summaryrefslogtreecommitdiffstats
path: root/lib/CodeGen
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
committerdim <dim@FreeBSD.org>2012-08-15 19:34:23 +0000
commit721c201bd55ffb73cb2ba8d39e0570fa38c44e15 (patch)
treeeacfc83d988e4b9d11114387ae7dc41243f2a363 /lib/CodeGen
parent2b2816e083a455f7a656ae88b0fd059d1688bb36 (diff)
downloadFreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.zip
FreeBSD-src-721c201bd55ffb73cb2ba8d39e0570fa38c44e15.tar.gz
Vendor import of llvm trunk r161861:
http://llvm.org/svn/llvm-project/llvm/trunk@161861
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp43
-rw-r--r--lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--lib/CodeGen/Analysis.cpp100
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp34
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp99
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp43
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h11
-rw-r--r--lib/CodeGen/BranchFolding.cpp202
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp53
-rw-r--r--lib/CodeGen/CallingConvLower.cpp8
-rw-r--r--lib/CodeGen/CodeGen.cpp4
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp81
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h2
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp91
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp11
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp12
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp803
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp5
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp11
-rw-r--r--lib/CodeGen/IfConversion.cpp45
-rw-r--r--lib/CodeGen/InlineSpiller.cpp20
-rw-r--r--lib/CodeGen/InterferenceCache.cpp93
-rw-r--r--lib/CodeGen/InterferenceCache.h34
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp70
-rw-r--r--lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp39
-rw-r--r--lib/CodeGen/LiveInterval.cpp286
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp738
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp24
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h26
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp120
-rw-r--r--lib/CodeGen/LiveRangeCalc.h63
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp120
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp152
-rw-r--r--lib/CodeGen/LiveRegMatrix.h148
-rw-r--r--lib/CodeGen/LiveVariables.cpp69
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp3
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp80
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp111
-rw-r--r--lib/CodeGen/MachineCSE.cpp69
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp50
-rw-r--r--lib/CodeGen/MachineFunction.cpp41
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp9
-rw-r--r--lib/CodeGen/MachineInstr.cpp295
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp4
-rw-r--r--lib/CodeGen/MachineLICM.cpp18
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp16
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp13
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp103
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp47
-rw-r--r--lib/CodeGen/MachineScheduler.cpp926
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp1153
-rw-r--r--lib/CodeGen/MachineTraceMetrics.h341
-rw-r--r--lib/CodeGen/MachineVerifier.cpp746
-rw-r--r--lib/CodeGen/PHIElimination.cpp184
-rw-r--r--lib/CodeGen/Passes.cpp273
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp156
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp39
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp374
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.cpp161
-rw-r--r--lib/CodeGen/RegAllocBase.h85
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp171
-rw-r--r--lib/CodeGen/RegAllocFast.cpp55
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp238
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp187
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterClassInfo.h132
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp1252
-rw-r--r--lib/CodeGen/RegisterCoalescer.h29
-rw-r--r--lib/CodeGen/RegisterPressure.cpp841
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp25
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp1013
-rw-r--r--lib/CodeGen/RenderMachineFunction.h338
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp23
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp405
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp336
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp55
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp58
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1007
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp42
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp218
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp542
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp162
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp8
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp20
-rw-r--r--lib/CodeGen/SlotIndexes.cpp3
-rw-r--r--lib/CodeGen/SpillPlacement.cpp11
-rw-r--r--lib/CodeGen/SplitKit.cpp27
-rw-r--r--lib/CodeGen/StackProtector.cpp25
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp3
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp4
-rw-r--r--lib/CodeGen/TailDuplication.cpp34
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp210
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp62
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp752
-rw-r--r--lib/CodeGen/VirtRegMap.cpp179
-rw-r--r--lib/CodeGen/VirtRegMap.h7
123 files changed, 10812 insertions, 7098 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 822a564..205480a 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,10 +16,10 @@
#define DEBUG_TYPE "post-RA-sched"
#include "AggressiveAntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -157,8 +157,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- for (const uint16_t *Alias = TRI->getOverlaps(*I);
- unsigned Reg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
@@ -173,8 +173,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- for (const uint16_t *Alias = TRI->getOverlaps(*I);
- unsigned Reg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
State->UnionGroups(Reg, 0);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
@@ -189,8 +189,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- for (const uint16_t *Alias = TRI->getOverlaps(Reg);
- unsigned AliasReg = *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
@@ -265,10 +265,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
IsImplicitDefUse(MI, MO)) {
const unsigned Reg = MO.getReg();
PassthruRegs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- PassthruRegs.insert(*Subreg);
- }
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PassthruRegs.insert(*SubRegs);
}
}
}
@@ -333,9 +331,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
}
// Repeat for subregisters.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
if (!State->IsLive(SubregReg)) {
KillIndices[SubregReg] = KillIdx;
DefIndices[SubregReg] = ~0u;
@@ -392,8 +389,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Any aliased that are live at this point are completely or
// partially defined here, so group those aliases with Reg.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (State->IsLive(AliasReg)) {
State->UnionGroups(Reg, AliasReg);
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
@@ -404,7 +401,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -423,9 +420,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- for (const uint16_t *Alias = TRI->getOverlaps(Reg);
- unsigned AliasReg = *Alias; ++Alias)
- DefIndices[AliasReg] = Count;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ DefIndices[*AI] = Count;
}
}
@@ -479,7 +475,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = NULL;
if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI);
+ RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -678,9 +674,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
goto next_super_reg;
} else {
bool found = false;
- for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
- *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (State->IsLive(AliasReg) ||
(KillIndices[Reg] > DefIndices[AliasReg])) {
DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 87f6431..32ad34a 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -15,9 +15,9 @@
//===----------------------------------------------------------------------===//
#include "AllocationOrder.h"
-#include "RegisterClassInfo.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 00874d4..447f398 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -203,6 +203,63 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+
+/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
+/// through it (and any transitive noop operands to it) and return its input
+/// value. This is used to determine if a tail call can be formed.
+///
+static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
+ // If V is not an instruction, it can't be looked through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
+
+ Value *Op = I->getOperand(0);
+
+ // Look through truly no-op truncates.
+ if (isa<TruncInst>(I) &&
+ TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
+ return getNoopInput(I->getOperand(0), TLI);
+
+ // Look through truly no-op bitcasts.
+ if (isa<BitCastInst>(I)) {
+ // No type change at all.
+ if (Op->getType() == I->getType())
+ return getNoopInput(Op, TLI);
+
+ // Pointer to pointer cast.
+ if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
+ return getNoopInput(Op, TLI);
+
+ if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
+ TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
+ TLI.isTypeLegal(EVT::getEVT(I->getType())))
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through inttoptr.
+ if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+ // Look through ptrtoint.
+ if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
+ // Make sure this isn't a truncating or extending cast. We could support
+ // this eventually, but don't bother for now.
+ if (TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ return getNoopInput(Op, TLI);
+ }
+
+
+ // Otherwise it's not something we can look through.
+ return V;
+}
+
+
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
@@ -226,7 +283,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// been fully understood.
if (!Ret &&
(!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
- !isa<UnreachableInst>(Term))) return false;
+ !isa<UnreachableInst>(Term)))
+ return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
@@ -264,28 +322,28 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
- for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
- U = dyn_cast<Instruction>(U->getOperand(0))) {
- if (!U)
- return false;
- if (!U->hasOneUse())
+ // We handle two cases: multiple return values + scalars.
+ Value *RetVal = Ret->getOperand(0);
+ if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
+ // Handle scalars first.
+ return getNoopInput(Ret->getOperand(0), TLI) == I;
+
+ // If this is an aggregate return, look through the insert/extract values and
+ // see if each is transparent.
+ for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(RetVal, i);
+ if (InScalar == 0) return false;
+ InScalar = getNoopInput(InScalar, TLI);
+
+ // If the scalar value being inserted is an extractvalue of the right index
+ // from the call, then everything is good.
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
+ if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
+ EVI->getIndices()[0] != i)
return false;
- if (U == I)
- break;
- // Check for a truly no-op truncate.
- if (isa<TruncInst>(U) &&
- TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
- continue;
- // Check for a truly no-op bitcast.
- if (isa<BitCastInst>(U) &&
- (U->getOperand(0)->getType() == U->getType() ||
- (U->getOperand(0)->getType()->isPointerTy() &&
- U->getType()->isPointerTy())))
- continue;
- // Otherwise it's not a true no-op.
- return false;
}
-
+
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index b60fda8..bf5d8c4 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
ARMException::ARMException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
- {}
+ : DwarfException(A) {}
ARMException::~ARMException() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b0b2ff4..d9be7a1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -24,7 +25,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -475,10 +475,8 @@ void AsmPrinter::EmitFunctionHeader() {
void AsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
- if (CurrentFnSym->isUndefined()) {
- OutStreamer.ForceCodeRegion();
+ if (CurrentFnSym->isUndefined())
return OutStreamer.EmitLabel(CurrentFnSym);
- }
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
@@ -615,7 +613,7 @@ bool AsmPrinter::needsSEHMoves() {
}
bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
- return MAI->doesDwarfUseRelocationsForStringPool();
+ return MAI->doesDwarfUseRelocationsAcrossSections();
}
void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
@@ -798,8 +796,8 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
- for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
- *SR && Reg < 0; ++SR) {
+ for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
+ ++SR) {
Reg = TRI->getDwarfRegNum(*SR, false);
// FIXME: Get the bit range this register uses of the superregister
// so that we can produce a DW_OP_bit_piece
@@ -1085,15 +1083,6 @@ void AsmPrinter::EmitJumpTableInfo() {
EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
- // If we know the form of the jump table, go ahead and tag it as such.
- if (!JTInDiffSection) {
- if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) {
- OutStreamer.EmitJumpTable32Region();
- } else {
- OutStreamer.EmitDataRegion();
- }
- }
-
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1399,13 +1388,14 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size)
const {
- // Emit Label+Offset
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
- MCConstantExpr::Create(Offset, OutContext),
- OutContext);
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
- OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+ OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/);
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index d605854..db43b06 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -326,11 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
+ MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
@@ -409,9 +409,28 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
/// instruction, using the specified assembler variant. Targets should
/// override this to format as appropriate.
bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
- // Target doesn't support this yet!
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'n': // Negate the immediate constant.
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << -MO.getImm();
+ return false;
+ }
+ }
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index cc5b642..d30e5bb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing dwarf compile unit.
+// This file contains support for constructing a dwarf compile unit.
//
//===----------------------------------------------------------------------===//
@@ -17,9 +17,9 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
@@ -33,7 +33,7 @@ using namespace llvm;
/// CompileUnit - Compile unit constructor.
CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
- DwarfDebug *DW)
+ DwarfDebug *DW)
: ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
@@ -198,7 +198,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
return;
DIFile File = Ty.getFile();
unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
- File.getDirectory());
+ File.getDirectory());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -308,7 +308,8 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die,
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
} else if (Element == DIBuilder::OpDeref) {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ if (!Location.isReg())
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
} else llvm_unreachable("unknown DIBuilder Opcode");
}
@@ -418,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- if (Location.isReg()) {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
- } else {
- if (Reg < 32)
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
- else {
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
- addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
- }
-
- addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
- }
+ if (Location.isReg())
+ addRegisterOp(Block, Location.getReg());
+ else
+ addRegisterOffset(Block, Location.getReg(), Location.getOffset());
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
@@ -646,8 +632,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
}
/// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty,
- unsigned Attribute) {
+void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) {
if (!Ty.Verify())
return;
@@ -776,6 +761,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
Buffer.addChild(ElemDie);
}
}
+ DIType DTy = CTy.getTypeDerivedFrom();
+ if (DTy.Verify()) {
+ addType(&Buffer, DTy);
+ addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1);
+ }
}
break;
case dwarf::DW_TAG_subroutine_type: {
@@ -801,9 +791,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add prototype flag if we're dealing with a C language and the
// function has been prototyped.
if (isPrototyped &&
- (Language == dwarf::DW_LANG_C89 ||
- Language == dwarf::DW_LANG_C99 ||
- Language == dwarf::DW_LANG_ObjC))
+ (Language == dwarf::DW_LANG_C89 ||
+ Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
@@ -846,19 +836,19 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType()) {
- DIDerivedType DDTy(Element);
- if (DDTy.getTag() == dwarf::DW_TAG_friend) {
- ElemDie = new DIE(dwarf::DW_TAG_friend);
- addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
- } else
- ElemDie = createMemberDIE(DIDerivedType(Element));
+ DIDerivedType DDTy(Element);
+ if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+ ElemDie = new DIE(dwarf::DW_TAG_friend);
+ addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+ } else
+ ElemDie = createMemberDIE(DIDerivedType(Element));
} else if (Element.isObjCProperty()) {
DIObjCProperty Property(Element);
ElemDie = new DIE(Property.getTag());
StringRef PropertyName = Property.getObjCPropertyName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
- addType(ElemDie, Property.getType());
- addSourceLine(ElemDie, Property);
+ addType(ElemDie, Property.getType());
+ addSourceLine(ElemDie, Property);
StringRef GetterName = Property.getObjCPropertyGetterName();
if (!GetterName.empty())
addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
@@ -925,19 +915,21 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
- || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- {
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
+ // TODO: Do we care about size for enum forward declarations?
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
- else {
+ else if (!CTy.isForwardDecl())
// Add zero size if it is not a forward declaration.
- if (CTy.isForwardDecl())
- addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- else
- addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
- }
+ addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy.isForwardDecl())
+ addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add source line info if available.
if (!CTy.isForwardDecl())
@@ -968,7 +960,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
/// for the given DITemplateValueParameter.
DIE *
-CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){
DIE *ParamDIE = getDIE(TPV);
if (ParamDIE)
return ParamDIE;
@@ -1015,17 +1007,17 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SPDie)
return SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ insertDIE(SP, SPDie);
+
DISubprogram SPDecl = SP.getFunctionDeclaration();
DIE *DeclDie = NULL;
if (SPDecl.isSubprogram()) {
DeclDie = getOrCreateSubprogramDIE(SPDecl);
}
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
-
- // DW_TAG_inlined_subroutine may refer to this DIE.
- insertDIE(SP, SPDie);
-
// Add to context owner.
addToContextOwner(SPDie, SP.getContext());
@@ -1240,7 +1232,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
+ DIE *IndexTy) {
DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
uint64_t L = SR.getLo();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 45e407e..b4ff9e8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,7 +15,7 @@
#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DIE.h"
-#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/DebugInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/OwningPtr.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index cb78878..649684a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -17,9 +17,10 @@
#include "DwarfAccelTable.h"
#include "DwarfCompileUnit.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DIBuilder.h"
#include "llvm/Module.h"
#include "llvm/Instructions.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -32,11 +33,10 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/DIBuilder.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -117,7 +117,6 @@ DIType DbgVariable::getType() const {
if (getName() == DT.getName())
return (DT.getTypeDerivedFrom());
}
- return Ty;
}
return Ty;
}
@@ -127,6 +126,7 @@ DIType DbgVariable::getType() const {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), FirstCU(0),
AbbreviationsSet(InitAbbreviationsSetSize),
+ SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator),
PrevLabel(NULL) {
NextStringPoolNumber = 0;
@@ -566,7 +566,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
- if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
Asm->GetTempSymbol("section_line"));
else
@@ -1310,8 +1310,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
continue;
- for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
- unsigned Reg = *AI; ++AI) {
+ for (MCRegAliasIterator AI(MOI->getReg(), TRI, true);
+ AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
const MDNode *Var = LiveUserVar[Reg];
if (!Var)
continue;
@@ -1381,7 +1382,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- 0);
+ DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
}
}
@@ -1421,6 +1422,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIVariable DV(Variables.getElement(i));
if (!DV || !DV.Verify() || !ProcessedVars.insert(DV))
continue;
+ // Check that DbgVariable for DV wasn't created earlier, when
+ // findAbstractVariable() was called for inlined instance of DV.
+ LLVMContext &Ctx = DV->getContext();
+ DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
+ if (AbstractVariables.lookup(CleanDV))
+ continue;
if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
addScopeVariable(Scope, new DbgVariable(DV, NULL));
}
@@ -1623,7 +1630,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) {
Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
V->getValue(),
4);
@@ -1636,10 +1643,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
break;
}
case dwarf::DW_AT_location: {
- if (DIELabel *L = dyn_cast<DIELabel>(Values[i]))
- Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
- else
+ if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ Asm->EmitLabelReference(L->getValue(), 4);
+ else
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else {
Values[i]->EmitValue(Asm, Form);
+ }
break;
}
case dwarf::DW_AT_accessibility: {
@@ -2049,9 +2060,11 @@ void DwarfDebug::emitDebugLoc() {
if (Element == DIBuilder::OpPlus) {
Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
Asm->EmitULEB128(DV.getAddrElement(++i));
- } else if (Element == DIBuilder::OpDeref)
- Asm->EmitInt8(dwarf::DW_OP_deref);
- else llvm_unreachable("unknown Opcode found in complex address");
+ } else if (Element == DIBuilder::OpDeref) {
+ if (!Entry.Loc.isReg())
+ Asm->EmitInt8(dwarf::DW_OP_deref);
+ } else
+ llvm_unreachable("unknown Opcode found in complex address");
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 83f30f5..d1d6512 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,11 +14,11 @@
#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#include "DIE.h"
+#include "llvm/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -188,6 +188,9 @@ class DwarfDebug {
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
+ /// DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
//===--------------------------------------------------------------------===//
// Attributes used to construct specific Dwarf sections.
//
@@ -210,11 +213,11 @@ class DwarfDebug {
/// SourceIdMap - Source id map, i.e. pair of source filename and directory,
/// separated by a zero byte, mapped to a unique id.
- StringMap<unsigned> SourceIdMap;
+ StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
/// StringPool - A String->Symbol mapping of strings used by indirect
/// references.
- StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+ StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
unsigned NextStringPoolNumber;
/// SectionMap - Provides a unique id per text section.
@@ -232,7 +235,7 @@ class DwarfDebug {
/// ScopeVariables - Collection of dbg variables of a scope.
DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
- /// AbstractVariables - Collection on abstract variables.
+ /// AbstractVariables - Collection of abstract variables.
DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
@@ -292,9 +295,6 @@ class DwarfDebug {
std::vector<FunctionDebugFrameInfo> DebugFrames;
- // DIEValueAllocator - All DIEValues are allocated through this allocator.
- BumpPtrAllocator DIEValueAllocator;
-
// Section Symbols: these are assembler temporary labels that are emitted at
// the beginning of each supported dwarf section. These are used to form
// section offsets and are created by EmitSectionLabels.
@@ -333,9 +333,6 @@ private:
/// of the function.
DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
- /// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S);
-
/// constructScopeDIE - Construct a DIE for this scope.
DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
@@ -517,9 +514,6 @@ public:
/// in the SourceIds map.
unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
- /// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(DISubprogram SP);
-
/// getStringPool - returns the entry into the start of the pool.
MCSymbol *getStringPool();
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b5f86ab..75f6056 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -175,17 +175,6 @@ public:
};
class ARMException : public DwarfException {
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index ef1d2ba..fb65bb7 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -137,9 +137,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
break;
unsigned Reg = I->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- ImpDefRegs.insert(SubReg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
++I;
}
if (ImpDefRegs.empty())
@@ -188,7 +187,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Use a RegScavenger to help update liveness when required.
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+ if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
RS = new RegScavenger();
else
MRI.invalidateLiveness();
@@ -819,10 +818,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
}
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
-
- if (!EnableTailMerge) return false;
-
bool MadeChange = false;
+ if (!EnableTailMerge) return MadeChange;
// First find blocks with no successors.
MergePotentials.clear();
@@ -839,6 +836,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() == TailMergeThreshold)
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
TriedMerging.insert(MergePotentials[i].getBlock());
+
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(NULL, NULL);
@@ -864,88 +862,97 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I) {
- if (I->pred_size() >= 2) {
- SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
- MachineBasicBlock *IBB = I;
- MachineBasicBlock *PredBB = prior(I);
- MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
- E2 = I->pred_end();
- P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
- MachineBasicBlock *PBB = *P;
- if (TriedMerging.count(PBB))
- continue;
- // Skip blocks that loop to themselves, can't tail merge these.
- if (PBB == IBB)
- continue;
- // Visit each predecessor only once.
- if (!UniquePreds.insert(PBB))
- continue;
- // Skip blocks which may jump to a landing pad. Can't tail merge these.
- if (PBB->getLandingPadSuccessor())
- continue;
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
- // Failing case: IBB is the target of a cbr, and
- // we cannot reverse the branch.
- SmallVector<MachineOperand, 4> NewCond(Cond);
- if (!Cond.empty() && TBB == IBB) {
- if (TII->ReverseBranchCondition(NewCond))
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = I;
+ MachineBasicBlock *PredBB = prior(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ E2 = I->pred_end();
+ P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
+ MachineBasicBlock *PBB = *P;
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB))
+ continue;
+
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->getLandingPadSuccessor())
+ continue;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB)
+ FBB = llvm::next(MachineFunction::iterator(PBB));
+ }
+
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice to have
+ // a bit in the edge so we didn't have to do all this.
+ if (IBB->isLandingPad()) {
+ MachineFunction::iterator IP = PBB; IP++;
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
+ PredNextBB = IP;
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
continue;
- // This is the QBB case described above
- if (!FBB)
- FBB = llvm::next(MachineFunction::iterator(PBB));
- }
- // Failing case: the only way IBB can be reached from PBB is via
- // exception handling. Happens for landing pads. Would be nice
- // to have a bit in the edge so we didn't have to do all this.
- if (IBB->isLandingPad()) {
- MachineFunction::iterator IP = PBB; IP++;
- MachineBasicBlock *PredNextBB = NULL;
- if (IP != MF.end())
- PredNextBB = IP;
- if (TBB == NULL) {
- if (IBB != PredNextBB) // fallthrough
- continue;
- } else if (FBB) {
- if (TBB != IBB && FBB != IBB) // cbr then ubr
- continue;
- } else if (Cond.empty()) {
- if (TBB != IBB) // ubr
- continue;
- } else {
- if (TBB != IBB && IBB != PredNextBB) // cbr
- continue;
- }
- }
- // Remove the unconditional branch at the end, if any.
- if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*PBB);
- if (!Cond.empty())
- // reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
- if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(IBB, PredBB);
- // Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in
- // TryTailMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
- if (MergePotentials.size() == 1 &&
- MergePotentials.begin()->getBlock() != PredBB)
- FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
+
return MadeChange;
}
@@ -1459,7 +1466,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
}
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
-/// in successors to. The location is ususally just before the terminator,
+/// in successors to. The location is usually just before the terminator,
/// however if the terminator is a conditional branch and its previous
/// instruction is the flag setting instruction, the previous instruction is
/// the preferred location. This function also gathers uses and defs of the
@@ -1483,9 +1490,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg)
continue;
if (MO.isUse()) {
- Uses.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
} else if (!MO.isDead())
// Don't try to hoist code in the rare case the terminator defines a
// register that is later used.
@@ -1545,18 +1551,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (!Reg)
continue;
if (MO.isUse()) {
- Uses.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Uses.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Uses.insert(*AI);
} else {
if (Uses.count(Reg)) {
Uses.erase(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Uses.erase(*SR); // Use getSubRegisters to be conservative
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
- Defs.insert(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- Defs.insert(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
}
}
@@ -1683,8 +1687,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
unsigned Reg = MO.getReg();
if (!Reg || !LocalDefsSet.count(Reg))
continue;
- for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
- LocalDefsSet.erase(*OR);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
}
// Track local defs so we can update liveins.
@@ -1696,8 +1700,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg)
continue;
LocalDefs.push_back(Reg);
- for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
- LocalDefsSet.insert(*OR);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.insert(*AI);
}
HasDups = true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 21729cd..2e189ad 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen
DeadMachineInstructionElim.cpp
DFAPacketizer.cpp
DwarfEHPrepare.cpp
+ EarlyIfConversion.cpp
EdgeBundles.cpp
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
@@ -30,6 +31,7 @@ add_llvm_library(LLVMCodeGen
LiveInterval.cpp
LiveIntervalAnalysis.cpp
LiveIntervalUnion.cpp
+ LiveRegMatrix.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
LiveRangeCalc.cpp
@@ -59,6 +61,7 @@ add_llvm_library(LLVMCodeGen
MachineSSAUpdater.cpp
MachineScheduler.cpp
MachineSink.cpp
+ MachineTraceMetrics.cpp
MachineVerifier.cpp
OcamlGC.cpp
OptimizePHIs.cpp
@@ -77,8 +80,8 @@ add_llvm_library(LLVMCodeGen
RegAllocPBQP.cpp
RegisterClassInfo.cpp
RegisterCoalescer.cpp
+ RegisterPressure.cpp
RegisterScavenging.cpp
- RenderMachineFunction.cpp
ScheduleDAG.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
@@ -103,5 +106,7 @@ add_llvm_library(LLVMCodeGen
VirtRegMap.cpp
)
+add_dependencies(LLVMCodeGen intrinsics_gen)
+
add_subdirectory(SelectionDAG)
add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index ea16a25..939af3f 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -39,18 +39,20 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
MachineFunctionPass::getAnalysisUsage(au);
}
-bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
<< "********** Function: "
- << fn.getFunction()->getName() << '\n');
-
- LiveIntervals &lis = getAnalysis<LiveIntervals>();
- VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
- for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
- LiveInterval &li = *I->second;
- if (TargetRegisterInfo::isVirtualRegister(li.reg))
- vrai.CalculateWeightAndHint(li);
+ << MF.getFunction()->getName() << '\n');
+
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRAI.CalculateWeightAndHint(LIS.getInterval(Reg));
}
return false;
}
@@ -86,6 +88,27 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
return tri.getMatchingSuperReg(hreg, sub, rc);
}
+// Check if all values in LI are rematerializable
+static bool isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
+ return false;
+ }
+ return true;
+}
+
void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
@@ -171,17 +194,11 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
}
// If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If none of the defs are
- // loads, then it's potentially very cheap to re-materialize.
+ // it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- bool isLoad = false;
- if (LIS.isReMaterializable(li, 0, isLoad)) {
- if (isLoad)
- totalWeight *= 0.9F;
- else
- totalWeight *= 0.5F;
- }
+ if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo()))
+ totalWeight *= 0.5F;
li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 2b7dfdb..0b747fd 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -49,8 +49,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
Size = MinSize;
if (MinAlign > (int)Align)
Align = MinAlign;
- if (MF.getFrameInfo()->getMaxAlignment() < Align)
- MF.getFrameInfo()->setMaxAlignment(Align);
+ MF.getFrameInfo()->ensureMaxAlignment(Align);
TM.getTargetLowering()->HandleByVal(this, Size);
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
@@ -58,9 +57,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- for (const uint16_t *Alias = TRI.getOverlaps(Reg);
- unsigned Reg = *Alias; ++Alias)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index a81bb5c..fb2c2e8 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeCalculateSpillWeightsPass(Registry);
initializeCodePlacementOptPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeEarlyIfConverterPass(Registry);
initializeExpandPostRAPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
@@ -53,7 +54,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeProcessImplicitDefsPass(Registry);
initializePEIPass(Registry);
initializeRegisterCoalescerPass(Registry);
- initializeRenderMachineFunctionPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
@@ -65,7 +65,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeUnreachableBlockElimPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index c13c05e..99233df 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -201,7 +201,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
// fallthrough edge.
if (!Prior->isSuccessor(End))
goto next_pred;
- // Otherwise we can stop scanning and procede to move the blocks.
+ // Otherwise we can stop scanning and proceed to move the blocks.
break;
}
// If we hit a switch or something complicated, don't move anything
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index bad5010..a9de1c749 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -62,17 +62,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -84,17 +78,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
@@ -104,18 +92,12 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
- unsigned Reg = *I;
- if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BBSize;
- DefIndices[Reg] = ~0u;
-
- // Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BBSize;
- DefIndices[AliasReg] = ~0u;
+ if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -208,7 +190,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -218,11 +200,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Now check for aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
// If an alias of the reg is used during the live range, give up.
// Note that this allows us to skip checking if AntiDepReg
// overlaps with any of the aliases, among other things.
- unsigned AliasReg = *Alias;
+ unsigned AliasReg = *AI;
if (Classes[AliasReg]) {
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -236,9 +218,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
KeepRegs.set(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- KeepRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
}
}
}
@@ -247,7 +228,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned Count) {
// Update liveness.
- // Proceding upwards, registers that are defed but not used in this
+ // Proceeding upwards, registers that are defed but not used in this
// instruction are now dead.
if (!TII->isPredicated(MI)) {
@@ -282,9 +263,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.reset(SubregReg);
@@ -292,11 +272,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
- *Super; ++Super) {
- unsigned SuperReg = *Super;
- Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- }
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -308,7 +285,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
+ NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -328,8 +305,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
"Kill and Def maps aren't consistent for Reg!");
}
// Repeat, for all aliases.
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
DefIndices[AliasReg] = ~0u;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 7746259..ad95c48 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,11 +17,11 @@
#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
#include "AntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/BitVector.h"
#include <map>
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 5ff641c..ff2f113 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,10 +23,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
using namespace llvm;
@@ -100,22 +100,23 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
reserveResources(&MID);
}
-namespace {
+namespace llvm {
// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
// Schedule method to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT, bool IsPostRA);
+ MachineDominatorTree &MDT, bool IsPostRA);
// Schedule - Actual scheduling work.
void schedule();
};
-} // end anonymous namespace
+}
DefaultVLIWScheduler::DefaultVLIWScheduler(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
bool IsPostRA) :
ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+ CanHandleTerminators = true;
}
void DefaultVLIWScheduler::schedule() {
@@ -129,49 +130,25 @@ VLIWPacketizerList::VLIWPacketizerList(
bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
TII = TM.getInstrInfo();
ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
- SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
}
// VLIWPacketizerList Dtor
VLIWPacketizerList::~VLIWPacketizerList() {
- delete SchedulerImpl;
- delete ResourceTracker;
-}
-
-// ignorePseudoInstruction - ignore pseudo instructions.
-bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) {
- if (MI->isDebugValue())
- return true;
-
- if (TII->isSchedulingBoundary(MI, MBB, MF))
- return true;
-
- return false;
-}
-
-// isSoloInstruction - return true if instruction I must end previous
-// packet.
-bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) {
- if (I->isInlineAsm())
- return true;
-
- return false;
-}
+ if (VLIWScheduler)
+ delete VLIWScheduler;
-// addToPacket - Add I to the current packet and reserve resource.
-void VLIWPacketizerList::addToPacket(MachineInstr *MI) {
- CurrentPacketMIs.push_back(MI);
- ResourceTracker->reserveResources(MI);
+ if (ResourceTracker)
+ delete ResourceTracker;
}
// endPacket - End the current packet, bundle packet instructions and reset
// DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineInstr *I) {
+ MachineInstr *MI) {
if (CurrentPacketMIs.size() > 1) {
MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst, I);
+ finalizeBundle(*MBB, MIFirst, MI);
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
@@ -181,31 +158,35 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
- assert(MBB->end() == EndItr && "Bad EndIndex");
-
- SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size());
-
- // Build the DAG without reordering instructions.
- SchedulerImpl->schedule();
-
- // Remember scheduling units.
- SUnits = SchedulerImpl->SUnits;
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+ VLIWScheduler->schedule();
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &VLIWScheduler->SUnits[i];
+ MIToSUnit[SU->getInstr()] = SU;
+ }
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
MachineInstr *MI = BeginItr;
- // Ignore pseudo instructions.
- if (ignorePseudoInstruction(MI, MBB))
- continue;
+ this->initPacketizerState();
// End the current packet if needed.
- if (isSoloInstruction(MI)) {
+ if (this->isSoloInstruction(MI)) {
endPacket(MBB, MI);
continue;
}
- SUnit *SUI = SchedulerImpl->getSUnit(MI);
+ // Ignore pseudo instructions.
+ if (this->ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[MI];
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
@@ -215,13 +196,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
MachineInstr *MJ = *VI;
- SUnit *SUJ = SchedulerImpl->getSUnit(MJ);
+ SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
// Is it legal to packetize SUI and SUJ together.
- if (!isLegalToPacketizeTogether(SUI, SUJ)) {
+ if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
// Allow packetization if dependency can be pruned.
- if (!isLegalToPruneDependencies(SUI, SUJ)) {
+ if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
endPacket(MBB, MI);
break;
@@ -234,11 +215,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
}
// Add MI to the current packet.
- addToPacket(MI);
+ BeginItr = this->addToPacket(MI);
} // For all instructions in BB.
// End any packet left behind.
endPacket(MBB, EndItr);
-
- SchedulerImpl->exitRegion();
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index aa10d1d..b4394e8 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -171,9 +171,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs)
- LivePhysRegs.reset(*SubRegs);
+ for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ LivePhysRegs.reset(*SR);
}
} else if (MO.isRegMask()) {
// Register mask of preserved registers. All clobbers are dead.
@@ -187,10 +186,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (MO.isReg() && MO.isUse()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- LivePhysRegs.set(Reg);
- for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
- *AliasSet; ++AliasSet)
- LivePhysRegs.set(*AliasSet);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LivePhysRegs.set(*AI);
}
}
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 944dd4f..7095624 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -39,7 +39,7 @@ namespace {
Constant *RewindFunction;
bool InsertUnwindResumeCalls(Function &Fn);
- Instruction *GetExceptionObject(ResumeInst *RI);
+ Value *GetExceptionObject(ResumeInst *RI);
public:
static char ID; // Pass identification, replacement for typeid.
@@ -68,9 +68,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
/// GetExceptionObject - Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
/// instructions, including the 'resume' instruction.
-Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
Value *V = RI->getOperand(0);
- Instruction *ExnObj = 0;
+ Value *ExnObj = 0;
InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
LoadInst *SelLoad = 0;
InsertValueInst *ExcIVI = 0;
@@ -81,7 +81,7 @@ Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
- ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+ ExnObj = ExcIVI->getOperand(1);
SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
EraseIVIs = true;
}
@@ -139,7 +139,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// _Unwind_Resume to the end of the single resume block.
ResumeInst *RI = Resumes.front();
BasicBlock *UnwindBB = RI->getParent();
- Instruction *ExnObj = GetExceptionObject(RI);
+ Value *ExnObj = GetExceptionObject(RI);
// Call the _Unwind_Resume function.
CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
@@ -162,7 +162,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
- Instruction *ExnObj = GetExceptionObject(RI);
+ Value *ExnObj = GetExceptionObject(RI);
PN->addIncoming(ExnObj, Parent);
++NumResumesLowered;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 0000000..f9347ef
--- /dev/null
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,803 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-ifcvt"
+#include "MachineTraceMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ if (MO->isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+
+ // Remember clobbered regunits.
+ if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI))
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (MIOperands MO(I); MO.isValid(); ++MO) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO->isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO->readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = 0;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands.");
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = 0;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm));
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MCSchedModel *SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: "
+ << ((Value*)MF.getFunction())->getName() << '\n');
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = 0;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (po_iterator<MachineDominatorTree*>
+ I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
+ if (tryConvertIf(I->getBlock()))
+ Changed = true;
+
+ MF.verify(this, "After early if-conversion");
+ return Changed;
+}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index a48c540..fee8e47 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -59,7 +59,7 @@ struct DomainValue {
// Pointer to the next DomainValue in a chain. When two DomainValues are
// merged, Victim.Next is set to point to Victor, so old DomainValue
- // references can be updated by folowing the chain.
+ // references can be updated by following the chain.
DomainValue *Next;
// Twiddleable instructions using or defining these registers.
@@ -666,7 +666,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// or -1.
AliasMap.resize(TRI->getNumRegs(), -1);
for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
- for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
+ AI.isValid(); ++AI)
AliasMap[*AI] = i;
}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index b14afc2..7a17331 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
} else {
TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
}
DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 75ae5b9..4214ba1 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -155,7 +156,9 @@ namespace {
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
const MachineBranchProbabilityInfo *MBPI;
+ MachineRegisterInfo *MRI;
+ bool PreRegAlloc;
bool MadeChange;
int FnNum;
public:
@@ -263,14 +266,20 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MRI = &MF.getRegInfo();
InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
- // Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false);
- bool BFChange = BF.OptimizeFunction(MF, TII,
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false);
+ BFChange = BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
+ }
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
<< MF.getFunction()->getName() << "\'");
@@ -621,7 +630,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (BBI.IsDone)
return;
- bool AlreadyPredicated = BBI.Predicate.size() > 0;
+ bool AlreadyPredicated = !BBI.Predicate.empty();
// First analyze the end of BB branches.
BBI.TrueBB = BBI.FalseBB = NULL;
BBI.BrCond.clear();
@@ -786,8 +795,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
unsigned Dups = 0;
unsigned Dups2 = 0;
- bool TNeedSub = TrueBBI.Predicate.size() > 0;
- bool FNeedSub = FalseBBI.Predicate.size() > 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
bool Enqueued = false;
BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
@@ -962,9 +971,8 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Redefs.insert(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- Redefs.insert(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
}
}
@@ -983,8 +991,8 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
Defs.push_back(Reg);
else if (MO.isKill()) {
Redefs.erase(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Redefs.erase(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.erase(*SubRegs);
}
}
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
@@ -993,11 +1001,12 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
if (AddImpUse)
// Treat predicated update as read + write.
MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
- true/*IsImp*/,false/*IsKill*/));
+ true/*IsImp*/,false/*IsKill*/,
+ false/*IsDead*/,true/*IsUndef*/));
} else {
Redefs.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- Redefs.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Redefs.insert(*SubRegs);
}
}
}
@@ -1335,8 +1344,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// These are defined before ctrl flow reach the 'false' instructions.
// They cannot be modified by the 'true' instructions.
ExtUses.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- ExtUses.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ ExtUses.insert(*SubRegs);
}
}
@@ -1344,8 +1353,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
unsigned Reg = Defs[i];
if (!ExtUses.count(Reg)) {
RedefsByFalse.insert(Reg);
- for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
- RedefsByFalse.insert(*SR);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RedefsByFalse.insert(*SubRegs);
}
}
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index d5ea666..07e37af 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -52,7 +52,6 @@ static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
namespace {
class InlineSpiller : public Spiller {
- MachineFunctionPass &Pass;
MachineFunction &MF;
LiveIntervals &LIS;
LiveStacks &LSS;
@@ -137,8 +136,7 @@ public:
InlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm)
- : Pass(pass),
- MF(mf),
+ : MF(mf),
LIS(pass.getAnalysis<LiveIntervals>()),
LSS(pass.getAnalysis<LiveStacks>()),
AA(&pass.getAnalysis<AliasAnalysis>()),
@@ -578,11 +576,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
if (isSibling(SrcReg)) {
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
- assert(SrcLR && "Copy from non-existing value");
+ LiveRangeQuery SrcQ(SrcLI, VNI->def);
+ assert(SrcQ.valueIn() && "Copy from non-existing value");
// Check if this COPY kills its source.
- SVI->second.KillsSource = (SrcLR->end == VNI->def);
- VNInfo *SrcVNI = SrcLR->valno;
+ SVI->second.KillsSource = SrcQ.isKill();
+ VNInfo *SrcVNI = SrcQ.valueIn();
DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
<< SrcVNI->id << '@' << SrcVNI->def
<< " kill=" << unsigned(SVI->second.KillsSource) << '\n');
@@ -1083,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
MRI.getRegClass(NewLI.reg), &TRI);
--MI; // Point to load instruction.
SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
+ // Some (out-of-tree) targets have EC reload instructions.
+ if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg))
+ if (MO->isEarlyClobber())
+ LoadIdx = LoadIdx.getRegSlot(true);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
@@ -1275,8 +1277,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
DEBUG(dbgs() << "Inline spilling "
<< MRI.getRegClass(edit.getReg())->getName()
- << ':' << edit.getParent() << "\nFrom original "
- << LIS.getInterval(Original) << '\n');
+ << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent()
+ << "\nFrom original " << LIS.getInterval(Original) << '\n');
assert(edit.getParent().isSpillable() &&
"Attempting to spill already spilled value.");
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 8368b58..1541bf0 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -39,7 +39,7 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
unsigned E = PhysRegEntries[PhysReg];
if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
if (!Entries[E].valid(LIUArray, TRI))
- Entries[E].revalidate();
+ Entries[E].revalidate(LIUArray, TRI);
return &Entries[E];
}
// No valid entry exists, pick the next round-robin entry.
@@ -61,13 +61,15 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
}
/// revalidate - LIU contents have changed, update tags.
-void InterferenceCache::Entry::revalidate() {
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
// Invalidate all block entries.
++Tag;
// Invalidate all iterators.
PrevPos = SlotIndex();
- for (unsigned i = 0, e = Aliases.size(); i != e; ++i)
- Aliases[i].second = Aliases[i].first->getTag();
+ unsigned i = 0;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
+ RegUnits[i].VirtTag = LIUArray[*Units].getTag();
}
void InterferenceCache::Entry::reset(unsigned physReg,
@@ -79,28 +81,23 @@ void InterferenceCache::Entry::reset(unsigned physReg,
++Tag;
PhysReg = physReg;
Blocks.resize(MF->getNumBlockIDs());
- Aliases.clear();
- for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
- LiveIntervalUnion *LIU = LIUArray + *AS;
- Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
- }
// Reset iterators.
PrevPos = SlotIndex();
- unsigned e = Aliases.size();
- Iters.resize(e);
- for (unsigned i = 0; i != e; ++i)
- Iters[i].setMap(Aliases[i].first->getMap());
+ RegUnits.clear();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ RegUnits.push_back(LIUArray[*Units]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ }
}
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
- unsigned i = 0, e = Aliases.size();
- for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
- LiveIntervalUnion *LIU = LIUArray + *AS;
- if (i == e || Aliases[i].first != LIU)
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ if (i == e)
return false;
- if (LIU->changedSince(Aliases[i].second))
+ if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
return false;
}
return i == e;
@@ -112,12 +109,20 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
// Use advanceTo only when possible.
if (PrevPos != Start) {
- if (!PrevPos.isValid() || Start < PrevPos)
- for (unsigned i = 0, e = Iters.size(); i != e; ++i)
- Iters[i].find(Start);
- else
- for (unsigned i = 0, e = Iters.size(); i != e; ++i)
- Iters[i].advanceTo(Start);
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
PrevPos = Start;
}
@@ -129,9 +134,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->Tag = Tag;
BI->First = BI->Last = SlotIndex();
- // Check for first interference.
- for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
- Iter &I = Iters[i];
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
if (!I.valid())
continue;
SlotIndex StartI = I.start();
@@ -141,6 +146,19 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
BI->First = StartI;
}
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
// Also check for register mask interference.
RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
@@ -168,8 +186,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
}
// Check for last interference in block.
- for (unsigned i = 0, e = Iters.size(); i != e; ++i) {
- Iter &I = Iters[i];
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
if (!I.valid() || I.start() >= Stop)
continue;
I.advanceTo(Stop);
@@ -183,6 +201,23 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
++I;
}
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveInterval *LI = RegUnits[i].Fixed;
+ if (I == LI->end() || I->start >= Stop)
+ continue;
+ I = LI->advanceTo(I, Stop);
+ bool Backup = I == LI->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
// Also check for register mask interference.
SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
for (unsigned i = RegMaskSlots.size();
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 485a325..3c928a5 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
//
//===----------------------------------------------------------------------===//
@@ -59,14 +60,31 @@ class InterferenceCache {
/// PrevPos - The previous position the iterators were moved to.
SlotIndex PrevPos;
- /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of
- /// PhysReg.
- SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases;
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
- typedef LiveIntervalUnion::SegmentIter Iter;
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
- /// Iters - an iterator for each alias
- SmallVector<Iter, 8> Iters;
+ /// Fixed interference in RegUnit.
+ LiveInterval *Fixed;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
/// Blocks - Interference for each block in the function.
SmallVector<BlockInterference, 8> Blocks;
@@ -91,7 +109,7 @@ class InterferenceCache {
bool hasRefs() const { return RefCount > 0; }
- void revalidate();
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
/// valid - Return true if this is a valid entry for physReg.
bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index a9ca42f..8d2282a 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -11,17 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
template <class ArgIt>
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a1f479a..cac0c83 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -78,40 +79,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
"and that InitializeAllTargetMCs() is being invoked!");
}
-/// Turn exception handling constructs into something the code generators can
-/// handle.
-static void addPassesToHandleExceptions(TargetMachine *TM,
- PassManagerBase &PM) {
- switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
- case ExceptionHandling::SjLj:
- // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
- // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
- // catch info can get misplaced when a selector ends up more than one block
- // removed from the parent invoke(s). This could happen when a landing
- // pad is shared by multiple invokes and is also a target of a normal
- // edge from elsewhere.
- PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
- // FALLTHROUGH
- case ExceptionHandling::DwarfCFI:
- case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
- PM.add(createDwarfEHPass(TM));
- break;
- case ExceptionHandling::None:
- PM.add(createLowerInvokePass(TM->getTargetLowering()));
-
- // The lower invoke pass may create unreachable code. Remove it.
- PM.add(createUnreachableBlockEliminationPass());
- break;
- }
-}
-
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
PassManagerBase &PM,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
// Targets may override createPassConfig to provide a target-specific sublass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ PassConfig->setStartStopPasses(StartAfter, StopAfter);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
@@ -120,7 +96,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
PassConfig->addIRPasses();
- addPassesToHandleExceptions(TM, PM);
+ PassConfig->addPassesToHandleExceptions();
PassConfig->addISelPrepare();
@@ -155,16 +131,30 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- bool DisableVerify) {
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
+ StartAfter, StopAfter);
if (!Context)
return true;
+ if (StopAfter) {
+ // FIXME: The intent is that this should eventually write out a YAML file,
+ // containing the LLVM IR, the machine-level IR (when stopping after a
+ // machine-level pass), and whatever other information is needed to
+ // deserialize the code and resume compilation. For now, just write the
+ // LLVM IR.
+ PM.add(createPrintModulePass(&Out));
+ return false;
+ }
+
if (hasMCSaveTempLabels())
Context->setAllowTemporaryLabels(false);
const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
OwningPtr<MCStreamer> AsmStreamer;
@@ -180,7 +170,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
MCAsmBackend *MAB = 0;
if (ShowMCEncoding) {
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context);
+ MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI,
+ *Context);
MAB = getTarget().createMCAsmBackend(getTargetTriple());
}
@@ -198,8 +189,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI,
- *Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
@@ -242,7 +233,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
if (!Context)
return true;
@@ -262,7 +253,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
raw_ostream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
if (!Ctx)
return true;
@@ -271,9 +262,10 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
+ const MCRegisterInfo &MRI = *getRegisterInfo();
const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
- *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI,
+ STI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
if (MCE == 0 || MAB == 0)
return true;
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index f1abcbb..6b6b9d0 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -16,8 +16,8 @@
#define DEBUG_TYPE "lexicalscopes"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 2187833..d631726 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -23,9 +23,9 @@
#include "LiveDebugVariables.h"
#include "VirtRegMap.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Metadata.h"
#include "llvm/Value.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LexicalScopes.h"
@@ -243,7 +243,7 @@ public:
/// computeIntervals - Compute the live intervals of all locations after
/// collecting all their def points.
- void computeIntervals(MachineRegisterInfo &MRI,
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
LiveIntervals &LIS, MachineDominatorTree &MDT,
UserValueScopes &UVS);
@@ -618,6 +618,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
LiveIntervals &LIS,
MachineDominatorTree &MDT,
UserValueScopes &UVS) {
@@ -634,15 +635,32 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
unsigned LocNo = Defs[i].second;
const MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg()) {
+ extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ continue;
+ }
+
// Register locations are constrained to where the register value is live.
- if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
- LiveInterval *LI = &LIS.getInterval(Loc.getReg());
- const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ LiveInterval *LI = 0;
+ const VNInfo *VNI = 0;
+ if (LIS.hasInterval(Loc.getReg())) {
+ LI = &LIS.getInterval(Loc.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
SmallVector<SlotIndex, 16> Kills;
extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
- addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
- } else
- extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ if (LI)
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ continue;
+ }
+
+ // For physregs, use the live range of the first regunit as a guide.
+ unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
+ LiveInterval *LI = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ // Don't track copies from physregs, it is too expensive.
+ extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS);
}
// Finally, erase all the undefs.
@@ -656,7 +674,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
void LDVImpl::computeIntervals() {
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
- userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
userValues[i]->mapVirtRegs(this);
}
}
@@ -721,7 +739,8 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
if (TargetRegisterInfo::isVirtualRegister(NewReg))
mapVirtReg(NewReg, UV);
- virtRegToEqClass.erase(OldReg);
+ if (OldReg != NewReg)
+ virtRegToEqClass.erase(OldReg);
do {
UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ac18843..0a795e6 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -48,6 +48,26 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
return I;
}
+VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+ iterator I = find(Def);
+ if (I == end()) {
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+ if (SlotIndex::isSameInstr(Def, I->start)) {
+ assert(I->start == Def && "Cannot insert def, already live");
+ assert(I->valno->def == Def && "Inconsistent existing value def");
+ return I->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
+ VNInfo *VNI = getNextValue(Def, VNInfoAllocator);
+ ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+}
+
/// killedInRange - Return true if the interval has kills in [Start,End).
bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
Ranges::const_iterator r =
@@ -140,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
valnos.pop_back();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->setIsUnused(true);
+ ValNo->markUnused();
}
}
@@ -176,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
// If NewEnd was in the middle of an interval, make sure to get its endpoint.
I->end = std::max(NewEnd, prior(MergeTo)->end);
- // Erase any dead ranges.
- ranges.erase(llvm::next(I), MergeTo);
-
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
- Ranges::iterator Next = llvm::next(I);
- if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
- I->end = Next->end;
- ranges.erase(Next);
+ if (MergeTo != ranges.end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ I->end = MergeTo->end;
+ ++MergeTo;
}
+
+ // Erase any dead ranges.
+ ranges.erase(llvm::next(I), MergeTo);
}
@@ -353,18 +373,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
markValNoForDeletion(ValNo);
}
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// index (register interval).
-VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
- for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
- i != e; ++i) {
- if ((*i)->def == Idx)
- return *i;
- }
-
- return 0;
-}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
@@ -373,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other,
const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
+ verify();
+
// Determine if any of our live range values are mapped. This is uncommon, so
// we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
@@ -440,16 +450,148 @@ void LiveInterval::join(LiveInterval &Other,
valnos.resize(NumNewVals); // shrinkify
// Okay, now insert the RHS live ranges into the LHS.
- iterator InsertPos = begin();
unsigned RangeNo = 0;
for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
// Map the valno in the other live range to the current live range.
I->valno = NewVNInfo[OtherAssignments[RangeNo]];
assert(I->valno && "Adding a dead range?");
- InsertPos = addRangeFrom(*I, InsertPos);
+ }
+ mergeIntervalRanges(Other);
+
+ verify();
+}
+
+/// \brief Helper function for merging in another LiveInterval's ranges.
+///
+/// This is a helper routine implementing an efficient merge of another
+/// LiveIntervals ranges into the current interval.
+///
+/// \param LHSValNo If non-NULL, set as the new value number for every range
+/// from RHS which is merged into the LHS.
+/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value
+/// number maches this value number will be merged into LHS.
+void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS,
+ VNInfo *LHSValNo,
+ const VNInfo *RHSValNo) {
+ if (RHS.empty())
+ return;
+
+ // Ensure we're starting with a valid range. Note that we don't verify RHS
+ // because it may have had its value numbers adjusted in preparation for
+ // merging.
+ verify();
+
+ // The strategy for merging these efficiently is as follows:
+ //
+ // 1) Find the beginning of the impacted ranges in the LHS.
+ // 2) Create a new, merged sub-squence of ranges merging from the position in
+ // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS
+ // entries being merged will be copied into this new range.
+ // 3) Replace the relevant section in LHS with these newly merged ranges.
+ // 4) Append any remaning ranges from RHS if LHS is exhausted in #2.
+ //
+ // We don't follow the typical in-place merge strategy for sorted ranges of
+ // appending the new ranges to the back and then using std::inplace_merge
+ // because one step of the merge can both mutate the original elements and
+ // remove elements from the original. Essentially, because the merge includes
+ // collapsing overlapping ranges, a more complex approach is required.
+
+ // We do an initial binary search to optimize for a common pattern: a large
+ // LHS, and a very small RHS.
+ const_iterator RI = RHS.begin(), RE = RHS.end();
+ iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI);
+
+ // Merge into NewRanges until one of the ranges is exhausted.
+ SmallVector<LiveRange, 4> NewRanges;
+
+ // Keep track of where to begin the replacement.
+ iterator ReplaceI = LI;
+
+ // If there are preceding ranges in the LHS, put the last one into NewRanges
+ // so we can optionally extend it. Adjust the replacement point accordingly.
+ if (LI != begin()) {
+ ReplaceI = llvm::prior(LI);
+ NewRanges.push_back(*ReplaceI);
+ }
+
+ // Now loop over the mergable portions of both LHS and RHS, merging into
+ // NewRanges.
+ while (LI != LE && RI != RE) {
+ // Skip incoming ranges with the wrong value.
+ if (RHSValNo && RI->valno != RHSValNo) {
+ ++RI;
+ continue;
+ }
+
+ // Select the first range. We pick the earliest start point, and then the
+ // largest range.
+ LiveRange R = *LI;
+ if (*RI < R) {
+ R = *RI;
+ ++RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ } else {
+ ++LI;
+ }
+
+ if (NewRanges.empty()) {
+ NewRanges.push_back(R);
+ continue;
+ }
+
+ LiveRange &LastR = NewRanges.back();
+ if (R.valno == LastR.valno) {
+ // Try to merge this range into the last one.
+ if (R.start <= LastR.end) {
+ LastR.end = std::max(LastR.end, R.end);
+ continue;
+ }
+ } else {
+ // We can't merge ranges across a value number.
+ assert(R.start >= LastR.end &&
+ "Cannot overlap two LiveRanges with differing ValID's");
+ }
+
+ // If all else fails, just append the range.
+ NewRanges.push_back(R);
+ }
+ assert(RI == RE || LI == LE);
+
+ // Check for being able to merge into the trailing sequence of ranges on the LHS.
+ if (!NewRanges.empty())
+ for (; LI != LE && (LI->valno == NewRanges.back().valno &&
+ LI->start <= NewRanges.back().end);
+ ++LI)
+ NewRanges.back().end = std::max(NewRanges.back().end, LI->end);
+
+ // Replace the ranges in the LHS with the newly merged ones. It would be
+ // really nice if there were a move-supporting 'replace' directly in
+ // SmallVector, but as there is not, we pay the price of copies to avoid
+ // wasted memory allocations.
+ SmallVectorImpl<LiveRange>::iterator NRI = NewRanges.begin(),
+ NRE = NewRanges.end();
+ for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI)
+ *ReplaceI = *NRI;
+ if (NRI == NRE)
+ ranges.erase(ReplaceI, LI);
+ else
+ ranges.insert(LI, NRI, NRE);
+
+ // And finally insert any trailing end of RHS (if we have one).
+ for (; RI != RE; ++RI) {
+ LiveRange R = *RI;
+ if (LHSValNo)
+ R.valno = LHSValNo;
+ if (!ranges.empty() &&
+ ranges.back().valno == R.valno && R.start <= ranges.back().end)
+ ranges.back().end = std::max(ranges.back().end, R.end);
+ else
+ ranges.push_back(R);
}
- ComputeJoinedWeight(Other);
+ // Ensure we finished with a valid new sequence of ranges.
+ verify();
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -458,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other,
/// the overlapping LiveRanges have the specified value number.
void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+ mergeIntervalRanges(RHS, LHSValNo);
}
-
/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
/// in RHS into this live interval as the specified value number.
/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
/// current interval, it will replace the value numbers of the overlaped
/// live ranges with the specified value number.
-void LiveInterval::MergeValueInAsValue(
- const LiveInterval &RHS,
- const VNInfo *RHSValNo, VNInfo *LHSValNo) {
- // TODO: Make this more efficient.
- iterator InsertPos = begin();
- for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- if (I->valno != RHSValNo)
- continue;
- // Map the valno in the other live range to the current live range.
- LiveRange Tmp = *I;
- Tmp.valno = LHSValNo;
- InsertPos = addRangeFrom(Tmp, InsertPos);
- }
+void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ mergeIntervalRanges(RHS, LHSValNo, RHSValNo);
}
-
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
/// LiveRanges with the V1 value number with the V2 value number. This can
@@ -543,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
- // Merge the relevant flags.
- V2->mergeFlags(V1);
-
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -569,6 +690,8 @@ void LiveInterval::Copy(const LiveInterval &RHS,
const LiveRange &LR = RHS.ranges[i];
addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
}
+
+ verify();
}
unsigned LiveInterval::getSize() const {
@@ -578,29 +701,6 @@ unsigned LiveInterval::getSize() const {
return Sum;
}
-/// ComputeJoinedWeight - Set the weight of a live interval Joined
-/// after Other has been merged into it.
-void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
- // If either of these intervals was spilled, the weight is the
- // weight of the non-spilled interval. This can only happen with
- // iterative coalescers.
-
- if (Other.weight != HUGE_VALF) {
- weight += Other.weight;
- }
- else if (weight == HUGE_VALF &&
- !TargetRegisterInfo::isPhysicalRegister(reg)) {
- // Remove this assert if you have an iterative coalescer
- assert(0 && "Joining to spilled interval");
- weight = Other.weight;
- }
- else {
- // Otherwise the weight stays the same
- // Remove this assert if you have an iterative coalescer
- assert(0 && "Joining from spilled interval");
- }
-}
-
raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
}
@@ -609,15 +709,10 @@ void LiveRange::dump() const {
dbgs() << *this << "\n";
}
-void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- OS << PrintReg(reg, TRI);
- if (weight != 0)
- OS << ',' << weight;
-
+void LiveInterval::print(raw_ostream &OS) const {
if (empty())
- OS << " EMPTY";
+ OS << "EMPTY";
else {
- OS << " = ";
for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
E = ranges.end(); I != E; ++I) {
OS << *I;
@@ -639,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
} else {
OS << vni->def;
if (vni->isPHIDef())
- OS << "-phidef";
- if (vni->hasPHIKill())
- OS << "-phikill";
+ OS << "-phi";
}
}
}
@@ -651,6 +744,23 @@ void LiveInterval::dump() const {
dbgs() << *this << "\n";
}
+#ifndef NDEBUG
+void LiveInterval::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != 0);
+ assert(I->valno == valnos[I->valno->id]);
+ if (llvm::next(I) != E) {
+ assert(I->end <= llvm::next(I)->start);
+ if (I->end == llvm::next(I)->start)
+ assert(I->valno != llvm::next(I)->valno);
+ }
+ }
+}
+#endif
+
void LiveRange::print(raw_ostream &os) const {
os << *this;
@@ -712,13 +822,13 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
- if (MO.isUse() && MO.isUndef())
- continue;
// DBG_VALUE instructions should have been eliminated earlier.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
- Idx = Idx.getRegSlot(MO.isUse());
- const VNInfo *VNI = LI.getVNInfoAt(Idx);
- assert(VNI && "Interval not live at use.");
+ LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
+ if (!VNI)
+ continue;
MO.setReg(LIV[getEqClass(VNI)]->reg);
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 934cc12..d0f8ae1 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -31,20 +32,20 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "LiveRangeCalc.h"
#include <algorithm>
#include <limits>
#include <cmath>
using namespace llvm;
-// Hidden options for help debugging.
-static cl::opt<bool> DisableReMat("disable-rematerialization",
- cl::init(false), cl::Hidden);
-
-STATISTIC(numIntervals , "Number of original intervals");
+// Switch to the new experimental algorithm for computing live intervals.
+static cl::opt<bool>
+NewLiveIntervals("new-live-intervals", cl::Hidden,
+ cl::desc("Use new algorithm forcomputing live intervals"));
char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -61,23 +62,35 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
AU.addPreserved<SlotIndexes>();
AU.addRequiredTransitive<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
+ DomTree(0), LRCalc(0) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() {
+ delete LRCalc;
+}
+
void LiveIntervals::releaseMemory() {
// Free the live intervals themselves.
- for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
- E = r2iMap_.end(); I != E; ++I)
- delete I->second;
-
- r2iMap_.clear();
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ VirtRegIntervals.clear();
RegMaskSlots.clear();
RegMaskBits.clear();
RegMaskBlocks.clear();
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ delete RegUnitIntervals[i];
+ RegUnitIntervals.clear();
+
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
@@ -85,20 +98,34 @@ void LiveIntervals::releaseMemory() {
/// runOnMachineFunction - Register allocate the whole function
///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
- mf_ = &fn;
- mri_ = &mf_->getRegInfo();
- tm_ = &fn.getTarget();
- tri_ = tm_->getRegisterInfo();
- tii_ = tm_->getInstrInfo();
- aa_ = &getAnalysis<AliasAnalysis>();
- lv_ = &getAnalysis<LiveVariables>();
- indexes_ = &getAnalysis<SlotIndexes>();
- allocatableRegs_ = tri_->getAllocatableSet(fn);
- reservedRegs_ = tri_->getReservedRegs(fn);
-
- computeIntervals();
-
- numIntervals += getNumIntervals();
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TM = &fn.getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ AA = &getAnalysis<AliasAnalysis>();
+ LV = &getAnalysis<LiveVariables>();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ if (!LRCalc)
+ LRCalc = new LiveRangeCalc();
+ AllocatableRegs = TRI->getAllocatableSet(fn);
+ ReservedRegs = TRI->getReservedRegs(fn);
+
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ if (NewLiveIntervals) {
+ // This is the new way of computing live intervals.
+ // It is independent of LiveVariables, and it can run at any time.
+ computeVirtRegs();
+ computeRegMasks();
+ } else {
+ // This is the old way of computing live intervals.
+ // It depends on LiveVariables.
+ computeIntervals();
+ }
+ computeLiveInRegUnits();
DEBUG(dump());
return true;
@@ -108,27 +135,24 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
- // Dump the physregs.
- for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
- if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
- LI->print(OS, tri_);
- OS << '\n';
- }
+ // Dump the regunits.
+ for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i)
+ if (LiveInterval *LI = RegUnitIntervals[i])
+ OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n';
// Dump the virtregs.
- for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
- if (const LiveInterval *LI =
- r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
- LI->print(OS, tri_);
- OS << '\n';
- }
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
+ }
printInstrs(OS);
}
void LiveIntervals::printInstrs(raw_ostream &OS) const {
OS << "********** MACHINEINSTRS **********\n";
- mf_->print(OS, indexes_);
+ MF->print(OS, Indexes);
}
void LiveIntervals::dumpInstrs() const {
@@ -176,13 +200,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineOperand& MO,
unsigned MOIdx,
LiveInterval &interval) {
- DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI));
// Virtual registers may be defined multiple times (due to phi
// elimination and 2-addr elimination). Much of what we do only has to be
// done once for the vreg. We use an empty interval to detect the first
// time we see a vreg.
- LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+ LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg);
if (interval.empty()) {
// Get the Idx of the defining instructions.
SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
@@ -226,22 +250,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << " +" << NewLR);
interval.addRange(NewLR);
- bool PHIJoin = lv_->isPHIJoin(interval.reg);
+ bool PHIJoin = LV->isPHIJoin(interval.reg);
if (PHIJoin) {
- // A phi join register is killed at the end of the MBB and revived as a new
- // valno in the killing blocks.
+ // A phi join register is killed at the end of the MBB and revived as a
+ // new valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
// live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
// live interval.
for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
E = vi.AliveBlocks.end(); I != E; ++I) {
- MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
- LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock),
+ ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR);
}
@@ -260,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
assert(getInstructionFromIndex(Start) == 0 &&
"PHI def index points at actual instruction.");
ValNo = interval.getNextValue(Start, VNInfoAllocator);
- ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
@@ -319,11 +342,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
OldValNo));
- DEBUG({
- dbgs() << " RESULT: ";
- interval.print(dbgs(), tri_);
- });
- } else if (lv_->isPHIJoin(interval.reg)) {
+ DEBUG(dbgs() << " RESULT: " << interval);
+ } else if (LV->isPHIJoin(interval.reg)) {
// In the case of PHI elimination, each variable definition is only
// live until the end of the block. We've already taken care of the
// rest of the live range.
@@ -337,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
llvm_unreachable("Multiply defined register");
@@ -347,101 +366,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << '\n');
}
-static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI) {
- const MachineBasicBlock* succ = *SI;
- if (succ->isLiveIn(Reg))
- return true;
- }
- return false;
-}
-
-void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator mi,
- SlotIndex MIIdx,
- MachineOperand& MO,
- LiveInterval &interval) {
- DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
-
- SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
- SlotIndex end = start;
-
- // If it is not used after definition, it is considered dead at
- // the instruction defining it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- // For earlyclobbers, the defSlot was pushed back one; the extra
- // advance below compensates.
- if (MO.isDead()) {
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- goto exit;
- }
-
- // If it is not dead on definition, it must be killed by a
- // subsequent instruction. Hence its interval is:
- // [defSlot(def), useSlot(kill)+1)
- baseIndex = baseIndex.getNextIndex();
- while (++mi != MBB->end()) {
-
- if (mi->isDebugValue())
- continue;
- if (getInstructionFromIndex(baseIndex) == 0)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-
- if (mi->killsRegister(interval.reg, tri_)) {
- DEBUG(dbgs() << " killed");
- end = baseIndex.getRegSlot();
- goto exit;
- } else {
- int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
- if (DefIdx != -1) {
- if (mi->isRegTiedToUseOperand(DefIdx)) {
- // Two-address instruction.
- end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
- } else {
- // Another instruction redefines the register before it is ever read.
- // Then the register is essentially dead at the instruction that
- // defines it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- }
- goto exit;
- }
- }
-
- baseIndex = baseIndex.getNextIndex();
- }
-
- // If we get here the register *should* be live out.
- assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
-
- // FIXME: We need saner rules for reserved regs.
- if (isReserved(interval.reg)) {
- end = start.getDeadSlot();
- } else {
- // Unreserved, unallocable registers like EFLAGS can be live across basic
- // block boundaries.
- assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
- "Unreserved reg not live-out?");
- end = getMBBEndIdx(MBB);
- }
-exit:
- assert(start < end && "did not find end of interval?");
-
- // Already exists? Extend old live interval.
- VNInfo *ValNo = interval.getVNInfoAt(start);
- bool Extend = ValNo != 0;
- if (!Extend)
- ValNo = interval.getNextValue(start, VNInfoAllocator);
- LiveRange LR(start, end, ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR << '\n');
-}
-
void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI,
SlotIndex MIIdx,
@@ -450,93 +374,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
getOrCreateInterval(MO.getReg()));
- else
- handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
- getOrCreateInterval(MO.getReg()));
-}
-
-void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
- SlotIndex MIIdx,
- LiveInterval &interval) {
- assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
- "Only physical registers can be live in.");
- assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
- MBB->isLandingPad()) &&
- "Allocatable live-ins only valid for entry blocks and landing pads.");
-
- DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
-
- // Look for kills, if it reaches a def before it's killed, then it shouldn't
- // be considered a livein.
- MachineBasicBlock::iterator mi = MBB->begin();
- MachineBasicBlock::iterator E = MBB->end();
- // Skip over DBG_VALUE at the start of the MBB.
- if (mi != E && mi->isDebugValue()) {
- while (++mi != E && mi->isDebugValue())
- ;
- if (mi == E)
- // MBB is empty except for DBG_VALUE's.
- return;
- }
-
- SlotIndex baseIndex = MIIdx;
- SlotIndex start = baseIndex;
- if (getInstructionFromIndex(baseIndex) == 0)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
-
- SlotIndex end = baseIndex;
- bool SeenDefUse = false;
-
- while (mi != E) {
- if (mi->killsRegister(interval.reg, tri_)) {
- DEBUG(dbgs() << " killed");
- end = baseIndex.getRegSlot();
- SeenDefUse = true;
- break;
- } else if (mi->modifiesRegister(interval.reg, tri_)) {
- // Another instruction redefines the register before it is ever read.
- // Then the register is essentially dead at the instruction that defines
- // it. Hence its interval is:
- // [defSlot(def), defSlot(def)+1)
- DEBUG(dbgs() << " dead");
- end = start.getDeadSlot();
- SeenDefUse = true;
- break;
- }
-
- while (++mi != E && mi->isDebugValue())
- // Skip over DBG_VALUE.
- ;
- if (mi != E)
- baseIndex = indexes_->getNextNonNullIndex(baseIndex);
- }
-
- // Live-in register might not be used at all.
- if (!SeenDefUse) {
- if (isAllocatable(interval.reg) ||
- !isRegLiveIntoSuccessor(MBB, interval.reg)) {
- // Allocatable registers are never live through.
- // Non-allocatable registers that aren't live into any successors also
- // aren't live through.
- DEBUG(dbgs() << " dead");
- return;
- } else {
- // If we get here the register is non-allocatable and live into some
- // successor. We'll conservatively assume it's live-through.
- DEBUG(dbgs() << " live through");
- end = getMBBEndIdx(MBB);
- }
- }
-
- SlotIndex defIdx = getMBBStartIdx(MBB);
- assert(getInstructionFromIndex(defIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
- vni->setIsPHIDef(true);
- LiveRange LR(start, end, vni);
-
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR << '\n');
}
/// computeIntervals - computes the live intervals for virtual
@@ -546,12 +383,12 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
<< "********** Function: "
- << ((Value*)mf_->getFunction())->getName() << '\n');
+ << ((Value*)MF->getFunction())->getName() << '\n');
- RegMaskBlocks.resize(mf_->getNumBlockIDs());
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
SmallVector<unsigned, 8> UndefUses;
- for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
@@ -564,22 +401,16 @@ void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "BB#" << MBB->getNumber()
<< ":\t\t# derived from " << MBB->getName() << "\n");
- // Create intervals for live-ins to this BB first.
- for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
- LE = MBB->livein_end(); LI != LE; ++LI) {
- handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
- }
-
// Skip over empty initial indices.
if (getInstructionFromIndex(MIIndex) == 0)
- MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ MIIndex = Indexes->getNextNonNullIndex(MIIndex);
for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
MI != miEnd; ++MI) {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
if (MI->isDebugValue())
continue;
- assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+ assert(Indexes->getInstructionFromIndex(MIIndex) == MI &&
"Lost SlotIndex synchronization");
// Handle defs.
@@ -593,7 +424,7 @@ void LiveIntervals::computeIntervals() {
continue;
}
- if (!MO.isReg() || !MO.getReg())
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
// handle register defs - build intervals
@@ -604,7 +435,7 @@ void LiveIntervals::computeIntervals() {
}
// Move to the next instr slot.
- MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+ MIIndex = Indexes->getNextNonNullIndex(MIIndex);
}
// Compute the number of register mask instructions in this block.
@@ -626,14 +457,147 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
return new LiveInterval(reg, Weight);
}
-/// dupInterval - Duplicate a live interval. The caller is responsible for
-/// managing the allocated memory.
-LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
- LiveInterval *NewLI = createInterval(li->reg);
- NewLI->Copy(*li, mri_, getVNInfoAllocator());
- return NewLI;
+
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI->empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->createDeadDefs(LI);
+ LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = createInterval(Reg);
+ VirtRegIntervals[Reg] = LI;
+ computeVirtRegInterval(LI);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+ RMB.first = RegMaskSlots.size();
+ for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+ MI != ME; ++MI)
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO->getRegMask());
+ }
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;;
+ }
}
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// computeRegUnitInterval - Compute the live interval of a register unit, based
+/// on the uses and defs of aliasing registers. The interval should be empty,
+/// or contain only dead phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
+ unsigned Unit = LI->reg;
+
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!MRI->reg_empty(Root))
+ LRCalc->createDeadDefs(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ if (!MRI->reg_empty(*Supers))
+ LRCalc->createDeadDefs(LI, *Supers);
+ }
+ }
+
+ // Now extend LI to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ unsigned Root = *Roots;
+ if (!isReserved(Root) && !MRI->reg_empty(Root))
+ LRCalc->extendToUses(LI, Root);
+ for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
+ unsigned Reg = *Supers;
+ if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LI, Reg);
+ }
+ }
+}
+
+
+/// computeLiveInRegUnits - Precompute the live ranges of any register units
+/// that are live-in to an ABI block somewhere. Register values can appear
+/// without a corresponding def when entering the entry block or a landing pad.
+///
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitIntervals.resize(TRI->getNumRegUnits());
+ DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the intervals allocated.
+ SmallVector<LiveInterval*, 8> NewIntvs;
+
+ // Check all basic blocks for live-ins.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock *MBB = MFI;
+
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
+ for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
+ LIE = MBB->livein_end(); LII != LIE; ++LII) {
+ for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = *Units;
+ LiveInterval *Intv = RegUnitIntervals[Unit];
+ if (!Intv) {
+ Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF);
+ NewIntvs.push_back(Intv);
+ }
+ VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the intervals.
+ for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i)
+ computeRegUnitInterval(NewIntvs[i]);
+}
+
+
/// shrinkToUses - After removing some uses of a register, shrink its live
/// range to just the remaining uses. This method does not compute reaching
/// defs for new uses, and it doesn't remove dead defs.
@@ -649,14 +613,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
// Visit all instructions reading li->reg.
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg);
MachineInstr *UseMI = I.skipInstruction();) {
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
- // Note: This intentionally picks up the wrong VNI in case of an EC redef.
- // See below.
- VNInfo *VNI = li->getVNInfoBefore(Idx);
+ LiveRangeQuery LRQ(*li, Idx);
+ VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
@@ -667,13 +630,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
}
// Special case: An early-clobber tied operand reads and writes the
- // register one slot early. The getVNInfoBefore call above would have
- // picked up the value defined by UseMI. Adjust the kill slot and value.
- if (SlotIndex::isSameInstr(VNI->def, Idx)) {
- Idx = VNI->def;
- VNI = li->getVNInfoBefore(Idx);
- assert(VNI && "Early-clobber tied value not available");
- }
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
WorkList.push_back(std::make_pair(Idx, VNI));
}
@@ -747,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
- VNI->setIsUnused(true);
+ VNI->markUnused();
NewLI.removeRange(*LII);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
CanSeparate = true;
@@ -755,7 +715,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(VNI->def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(li->reg, tri_);
+ MI->addRegisterDead(li->reg, TRI);
if (dead && MI->allDefsAreDead()) {
DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI);
dead->push_back(MI);
@@ -775,13 +735,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
//
void LiveIntervals::addKillFlags() {
- for (iterator I = begin(), E = end(); I != E; ++I) {
- unsigned Reg = I->first;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
continue;
- if (mri_->reg_nodbg_empty(Reg))
- continue;
- LiveInterval *LI = I->second;
+ LiveInterval *LI = &getInterval(Reg);
// Every instruction that kills Reg corresponds to a live range end point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
@@ -797,101 +755,6 @@ void LiveIntervals::addKillFlags() {
}
}
-/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
-/// allow one) virtual register operand, then its uses are implicitly using
-/// the register. Returns the virtual register.
-unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
- MachineInstr *MI) const {
- unsigned RegOp = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0 || Reg == li.reg)
- continue;
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
- continue;
- RegOp = MO.getReg();
- break; // Found vreg operand - leave the loop.
- }
- return RegOp;
-}
-
-/// isValNoAvailableAt - Return true if the val# of the specified interval
-/// which reaches the given instruction also reaches the specified use index.
-bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
- SlotIndex UseIdx) const {
- VNInfo *UValNo = li.getVNInfoAt(UseIdx);
- return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
-}
-
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool
-LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- bool &isLoad) {
- if (DisableReMat)
- return false;
-
- if (!tii_->isTriviallyReMaterializable(MI, aa_))
- return false;
-
- // Target-specific code can mark an instruction as being rematerializable
- // if it has one virtual reg use, though it had better be something like
- // a PIC base register which is likely to be live everywhere.
- unsigned ImpUse = getReMatImplicitUse(li, MI);
- if (ImpUse) {
- const LiveInterval &ImpLi = getInterval(ImpUse);
- for (MachineRegisterInfo::use_nodbg_iterator
- ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
- ri != re; ++ri) {
- MachineInstr *UseMI = &*ri;
- SlotIndex UseIdx = getInstructionIndex(UseMI);
- if (li.getVNInfoAt(UseIdx) != ValNo)
- continue;
- if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
- return false;
- }
-
- // If a register operand of the re-materialized instruction is going to
- // be spilled next, then it's not legal to re-materialize this instruction.
- if (SpillIs)
- for (unsigned i = 0, e = SpillIs->size(); i != e; ++i)
- if (ImpUse == (*SpillIs)[i]->reg)
- return false;
- }
- return true;
-}
-
-/// isReMaterializable - Returns true if every definition of MI of every
-/// val# of the specified interval is re-materializable.
-bool
-LiveIntervals::isReMaterializable(const LiveInterval &li,
- const SmallVectorImpl<LiveInterval*> *SpillIs,
- bool &isLoad) {
- isLoad = false;
- for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
- i != e; ++i) {
- const VNInfo *VNI = *i;
- if (VNI->isUnused())
- continue; // Dead val#.
- // Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
- if (!ReMatDefMI)
- return false;
- bool DefIsLoad = false;
- if (!ReMatDefMI ||
- !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
- return false;
- isLoad |= DefIsLoad;
- }
- return true;
-}
-
MachineBasicBlock*
LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
// A local live range must be fully contained inside the block, meaning it is
@@ -911,11 +774,30 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
// getMBBFromIndex doesn't need to search the MBB table when both indexes
// belong to proper instructions.
- MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
- MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
return MBB1 == MBB2 ? MBB1 : NULL;
}
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *PHI = *I;
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
float
LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
// Limit the loop depth ridiculousness.
@@ -940,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
- VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getMBBEndIdx(startInst->getParent()), VN);
@@ -990,7 +871,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
if (!Found) {
// This is the first overlap. Initialize UsableRegs to all ones.
UsableRegs.clear();
- UsableRegs.resize(tri_->getNumRegs(), true);
+ UsableRegs.resize(TRI->getNumRegs(), true);
Found = true;
}
// Remove usable registers clobbered by this mask.
@@ -1101,6 +982,9 @@ public:
BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
+ Entering.clear();
+ Internal.clear();
+ Exiting.clear();
collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
@@ -1176,78 +1060,44 @@ private:
// TODO: Currently we're skipping uses that are reserved or have no
// interval, but we're not updating their kills. This should be
// fixed.
- if (!LIS.hasInterval(Reg) ||
- (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))
continue;
- LiveInterval* LI = &LIS.getInterval(Reg);
-
- if (MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
- }
- if (MO.isDef()) {
- if (MO.isEarlyClobber()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
- assert(LR != 0 && "No EC range?");
- if (LR->end > OldIdx.getDeadSlot())
- Exiting.insert(std::make_pair(LI, LR));
- else
- Internal.insert(std::make_pair(LI, LR));
- } else if (MO.isDead()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
- assert(LR != 0 && "No dead-def range?");
- Internal.insert(std::make_pair(LI, LR));
- } else {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
- assert(LR && LR->end > OldIdx.getDeadSlot() &&
- "Non-dead-def should have live range exiting.");
- Exiting.insert(std::make_pair(LI, LR));
- }
+ // Collect ranges for register units. These live ranges are computed on
+ // demand, so just skip any that haven't been computed yet.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
+ collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx);
+ } else {
+ // Collect ranges for individual virtual registers.
+ collectRanges(MO, &LIS.getInterval(Reg),
+ Entering, Internal, Exiting, OldIdx);
}
}
}
- // Collect IntRangePairs for all operands of MI that may need fixing.
- void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
- RangeSet& Exiting, SlotIndex MIStartIdx,
- SlotIndex MIEndIdx) {
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& MO = *MOI;
- assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
-
- unsigned Reg = MO.getReg();
-
- // TODO: Currently we're skipping uses that are reserved or have no
- // interval, but we're not updating their kills. This should be
- // fixed.
- if (!LIS.hasInterval(Reg) ||
- (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
- continue;
-
- LiveInterval* LI = &LIS.getInterval(Reg);
-
- if (MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
- }
- if (MO.isDef()) {
- assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
- assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
- LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
- assert(LR != 0 && "Internal ranges not allowed in bundles.");
+ void collectRanges(const MachineOperand &MO, LiveInterval *LI,
+ RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
+ SlotIndex OldIdx) {
+ if (MO.readsReg()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+ if (LR != 0)
+ Entering.insert(std::make_pair(LI, LR));
+ }
+ if (MO.isDef()) {
+ LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+ assert(LR != 0 && "No live range for def?");
+ if (LR->end > OldIdx.getDeadSlot())
Exiting.insert(std::make_pair(LI, LR));
- }
+ else
+ Internal.insert(std::make_pair(LI, LR));
}
}
- BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+ BundleRanges createBundleRanges(RangeSet& Entering,
+ RangeSet& Internal,
+ RangeSet& Exiting) {
BundleRanges BR;
for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
@@ -1284,7 +1134,8 @@ private:
return; // Bail out if we don't have kill flags on the old register.
MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
- assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+ assert(!NewKillMI->killsRegister(reg) &&
+ "New kill instr is already a kill.");
OldKillMI->clearRegisterKills(reg, &TRI);
NewKillMI->addRegisterKilled(reg, &TRI);
}
@@ -1523,22 +1374,23 @@ private:
};
void LiveIntervals::handleMove(MachineInstr* MI) {
- SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
- indexes_->removeMachineInstrFromMaps(MI);
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
SlotIndex NewIndex = MI->isInsideBundle() ?
- indexes_->getInstructionIndex(MI) :
- indexes_->insertMachineInstrInMaps(MI);
+ Indexes->getInstructionIndex(MI) :
+ Indexes->insertMachineInstrInMaps(MI);
assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
OldIndex < getMBBEndIdx(MI->getParent()) &&
"Cannot handle moves across basic block boundaries.");
assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
- HMEditor HME(*this, *mri_, *tri_, NewIndex);
+ HMEditor HME(*this, *MRI, *TRI, NewIndex);
HME.moveAllRangesFrom(MI, OldIndex);
}
-void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
- SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
- HMEditor HME(*this, *mri_, *tri_, NewIndex);
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
+ MachineInstr* BundleStart) {
+ SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, NewIndex);
HME.moveAllRangesInto(MI, BundleStart);
}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 60a6880..dadd02b 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -81,7 +81,6 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
void
LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- OS << "LIU " << PrintReg(RepReg, TRI);
if (empty()) {
OS << " empty\n";
return;
@@ -209,3 +208,26 @@ bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
VRI = VirtReg->advanceTo(VRI, Overlaps.start());
}
}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = 0;
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index dbf5ac1..cd4e690 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -60,13 +60,11 @@ public:
class Query;
private:
- const unsigned RepReg; // representative register number
unsigned Tag; // unique tag for current contents.
LiveSegments Segments; // union of virtual reg segments
public:
- LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
- {}
+ explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {}
// Iterate over all segments in the union of live virtual registers ordered
// by their starting position.
@@ -183,6 +181,28 @@ public:
Query(const Query&); // DO NOT IMPLEMENT
void operator=(const Query&); // DO NOT IMPLEMENT
};
+
+ // Array of LiveIntervalUnions.
+ class Array {
+ unsigned Size;
+ LiveIntervalUnion *LIUs;
+ public:
+ Array() : Size(0), LIUs(0) {}
+ ~Array() { clear(); }
+
+ // Initialize the array to have Size entries.
+ // Reuse an existing allocation if the size matches.
+ void init(LiveIntervalUnion::Allocator&, unsigned Size);
+
+ unsigned size() const { return Size; }
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned idx) {
+ assert(idx < Size && "idx out of bounds");
+ return LIUs[idx];
+ }
+ };
};
} // end namespace llvm
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d8ab791..d828f25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -14,10 +14,19 @@
#define DEBUG_TYPE "regalloc"
#include "LiveRangeCalc.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
-void LiveRangeCalc::reset(const MachineFunction *MF) {
+void LiveRangeCalc::reset(const MachineFunction *MF,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+
unsigned N = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(N);
@@ -26,8 +35,72 @@ void LiveRangeCalc::reset(const MachineFunction *MF) {
}
+void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LI->createDeadDef() will deduplicate.
+ for (MachineRegisterInfo::def_iterator
+ I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
+ const MachineInstr *MI = &*I;
+ // Find the corresponding slot index.
+ SlotIndex Idx;
+ if (MI->isPHI())
+ // PHI defs begin at the basic block start index.
+ Idx = Indexes->getMBBStartIdx(MI->getParent());
+ else
+ // Instructions are either normal 'r', or early clobber 'e'.
+ Idx = Indexes->getInstructionIndex(MI)
+ .getRegSlot(I.getOperand().isEarlyClobber());
+
+ // Create the def in LI. This may find an existing def.
+ LI->createDeadDef(Idx, *Alloc);
+ }
+}
+
+
+void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all operands that read Reg. This may include partial defs.
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ++I) {
+ const MachineOperand &MO = I.getOperand();
+ if (!MO.readsReg())
+ continue;
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ const MachineInstr *MI = &*I;
+
+ // Find the SlotIndex being read.
+ SlotIndex Idx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // PHI operands are paired: (Reg, PredMBB).
+ // Extend the live range to be live-out from PredMBB.
+ Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB());
+ } else {
+ // This is a normal instruction.
+ Idx = Indexes->getInstructionIndex(MI).getRegSlot();
+ // Check for early-clobber redefs.
+ unsigned DefIdx;
+ if (MO.isDef()) {
+ if (MO.isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ if (MI->getOperand(DefIdx).isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+ }
+ extend(LI, Idx, Reg);
+ }
+}
+
+
// Transfer information from the LiveIn vector to the live ranges.
-void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
+void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) {
for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(),
E = LiveIn.end(); I != E; ++I) {
if (!I->DomNode)
@@ -56,9 +129,7 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) {
void LiveRangeCalc::extend(LiveInterval *LI,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+ unsigned PhysReg) {
assert(LI && "Missing live range");
assert(Kill.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
@@ -75,34 +146,31 @@ void LiveRangeCalc::extend(LiveInterval *LI,
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree);
+ VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg);
// When there were multiple different values, we may need new PHIs.
if (!VNI)
- updateSSA(Indexes, DomTree, Alloc);
+ updateSSA();
- updateLiveIns(VNI, Indexes);
+ updateLiveIns(VNI);
}
// This function is called by a client after using the low-level API to add
// live-out and live-in blocks. The unique value optimization is not
// available, SplitEditor::transferValues handles that case directly anyway.
-void LiveRangeCalc::calculateValues(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+void LiveRangeCalc::calculateValues() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
- updateSSA(Indexes, DomTree, Alloc);
- updateLiveIns(0, Indexes);
+ updateSSA();
+ updateLiveIns(0);
}
VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree) {
+ unsigned PhysReg) {
// Blocks where LI should be live-in.
SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB);
@@ -113,7 +181,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
MachineBasicBlock *MBB = WorkList[i];
- assert(!MBB->pred_empty() && "Value live-in to entry block?");
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
MachineBasicBlock *Pred = *PI;
@@ -168,9 +251,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI,
// This is essentially the same iterative algorithm that SSAUpdater uses,
// except we already have a dominator tree, so we don't have to recompute it.
-void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc) {
+void LiveRangeCalc::updateSSA() {
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
@@ -238,7 +319,6 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
SlotIndex Start, End;
tie(Start, End) = Indexes->getMBBRange(MBB);
VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
- VNI->setIsPHIDef(true);
I->Value = VNI;
// This block is done, we know the final value.
I->DomNode = 0;
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index b8c8585..909829b 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -34,6 +34,11 @@ template <class NodeT> class DomTreeNodeBase;
typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
class LiveRangeCalc {
+ const MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ VNInfo::Allocator *Alloc;
+
/// Seen - Bit vector of active entries in LiveOut, also used as a visited
/// set by findReachingDefs. One entry per basic block, indexed by block
/// number. This is kept as a separate bit vector because it can be cleared
@@ -100,26 +105,27 @@ class LiveRangeCalc {
/// to be live-in are added to LiveIn. If a unique reaching def is found,
/// its value is returned, if Kill is jointly dominated by multiple values,
/// NULL is returned.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
VNInfo *findReachingDefs(LiveInterval *LI,
MachineBasicBlock *KillMBB,
SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree);
+ unsigned PhysReg);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
///
/// Every live-in block must be jointly dominated by the added live-out
/// blocks. No values are read from the live ranges.
- void updateSSA(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void updateSSA();
/// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI
/// as a wildcard value for LiveIn entries without a value.
- void updateLiveIns(VNInfo *VNI, SlotIndexes*);
+ void updateLiveIns(VNInfo *VNI);
public:
+ LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+
//===--------------------------------------------------------------------===//
// High-level interface.
//===--------------------------------------------------------------------===//
@@ -132,14 +138,14 @@ public:
/// that may overlap a previously computed live range, and before the first
/// live range in a function. If live ranges are not known to be
/// non-overlapping, call reset before each.
- void reset(const MachineFunction *MF);
+ void reset(const MachineFunction *MF,
+ SlotIndexes*,
+ MachineDominatorTree*,
+ VNInfo::Allocator*);
/// calculate - Calculate the live range of a virtual register from its defs
/// and uses. LI must be empty with no values.
- void calculate(LiveInterval *LI,
- MachineRegisterInfo *MRI,
- SlotIndexes *Indexes,
- VNInfo::Allocator *Alloc);
+ void calculate(LiveInterval *LI);
//===--------------------------------------------------------------------===//
// Mid-level interface.
@@ -154,21 +160,30 @@ public:
/// Kill is not dominated by a single existing value, PHI-defs are inserted
/// as required to preserve SSA form. If Kill is known to be dominated by a
/// single existing value, Alloc may be null.
- void extend(LiveInterval *LI,
- SlotIndex Kill,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0);
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveInterval *LI, unsigned Reg);
- /// extendToUses - Extend the live range of LI to reach all uses.
+ /// createDeadDefs - Create a dead def in LI for every def of LI->reg.
+ void createDeadDefs(LiveInterval *LI) {
+ createDeadDefs(LI, LI->reg);
+ }
+
+ /// extendToUses - Extend the live range of LI to reach all uses of Reg.
///
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
- void extendToUses(LiveInterval *LI,
- MachineRegisterInfo *MRI,
- SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void extendToUses(LiveInterval *LI, unsigned Reg);
+
+ /// extendToUses - Extend the live range of LI to reach all uses of LI->reg.
+ void extendToUses(LiveInterval *LI) {
+ extendToUses(LI, LI->reg);
+ }
//===--------------------------------------------------------------------===//
// Low-level interface.
@@ -216,9 +231,7 @@ public:
///
/// Every predecessor of a live-in block must have been given a value with
/// setLiveOutValue, the value may be null for live-trough blocks.
- void calculateValues(SlotIndexes *Indexes,
- MachineDominatorTree *DomTree,
- VNInfo::Allocator *Alloc);
+ void calculateValues();
};
} // end namespace llvm
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 695f536..b4ce9aa 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -38,7 +38,7 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.getOrCreateInterval(VReg);
- newRegs_.push_back(&LI);
+ NewRegs.push_back(&LI);
return LI;
}
@@ -46,16 +46,16 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
const MachineInstr *DefMI,
AliasAnalysis *aa) {
assert(DefMI && "Missing instruction");
- scannedRemattable_ = true;
+ ScannedRemattable = true;
if (!TII.isTriviallyReMaterializable(DefMI, aa))
return false;
- remattable_.insert(VNI);
+ Remattable.insert(VNI);
return true;
}
void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
- for (LiveInterval::vni_iterator I = parent_.vni_begin(),
- E = parent_.vni_end(); I != E; ++I) {
+ for (LiveInterval::vni_iterator I = getParent().vni_begin(),
+ E = getParent().vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
@@ -64,13 +64,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
continue;
checkRematerializable(VNI, DefMI, aa);
}
- scannedRemattable_ = true;
+ ScannedRemattable = true;
}
bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
- if (!scannedRemattable_)
+ if (!ScannedRemattable)
scanRemattable(aa);
- return !remattable_.empty();
+ return !Remattable.empty();
}
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
@@ -82,12 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
UseIdx = UseIdx.getRegSlot(true);
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
- if (!MO.isReg() || !MO.getReg() || MO.isDef())
- continue;
- // Reserved registers are OK.
- if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
+ // We can't remat physreg uses, unless it is a constant.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+ continue;
+ return false;
+ }
+
LiveInterval &li = LIS.getInterval(MO.getReg());
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
@@ -101,10 +105,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
bool LiveRangeEdit::canRematerializeAt(Remat &RM,
SlotIndex UseIdx,
bool cheapAsAMove) {
- assert(scannedRemattable_ && "Call anyRematerializable first");
+ assert(ScannedRemattable && "Call anyRematerializable first");
// Use scanRemattable info.
- if (!remattable_.count(RM.ParentVNI))
+ if (!Remattable.count(RM.ParentVNI))
return false;
// No defining instruction provided.
@@ -136,13 +140,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
- rematted_.insert(RM.ParentVNI);
+ Rematted.insert(RM.ParentVNI);
return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
.getRegSlot();
}
void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
- if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
LIS.removeInterval(Reg);
}
@@ -173,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (!DefMI || !UseMI)
return false;
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI,
+ LIS.getInstructionIndex(DefMI),
+ LIS.getInstructionIndex(UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ return false;
+
DEBUG(dbgs() << "Try to fold single def: " << *DefMI
<< " into single use: " << *UseMI);
@@ -220,14 +237,22 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
+
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg())
continue;
unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ ReadsPhysRegs = true;
continue;
+ }
LiveInterval &LI = LIS.getInterval(Reg);
// Shrink read registers, unless it is likely to be expensive and
@@ -242,22 +267,49 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// Remove defined value.
if (MOI->isDef()) {
if (VNInfo *VNI = LI.getVNInfoAt(Idx)) {
- if (delegate_)
- delegate_->LRE_WillShrinkVirtReg(LI.reg);
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
LI.removeValNo(VNI);
- if (LI.empty()) {
- ToShrink.remove(&LI);
- eraseVirtReg(Reg);
- }
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
}
}
}
- if (delegate_)
- delegate_->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ unsigned Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
}
if (ToShrink.empty())
@@ -268,8 +320,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
- if (delegate_)
- delegate_->LRE_WillShrinkVirtReg(LI->reg);
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
@@ -304,10 +356,14 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
// interval must contain all the split products, and LI doesn't.
if (IsOriginal)
VRM->setIsSplitFromReg(Dups.back()->reg, 0);
- if (delegate_)
- delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
}
ConEQ.Distribute(&Dups[0], MRI);
+ DEBUG({
+ for (unsigned i = 0; i != NumComp; ++i)
+ dbgs() << '\t' << *Dups[i] << '\n';
+ });
}
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 0000000..cdb1776
--- /dev/null
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,152 @@
+//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveRegMatrix.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
+ UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ Queries[i].clear();
+ }
+}
+
+void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ MRI->setPhysRegUsed(PhysReg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].unify(VirtReg);
+ }
+ ++NumAssigned;
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI));
+ Matrix[*Units].extract(VirtReg);
+ }
+ ++NumUnassigned;
+ DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg;
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (VirtReg.overlaps(LIS->getRegUnit(*Units)))
+ return true;
+ return false;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+ unsigned RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (query(VirtReg, *Units).checkInterference())
+ return IK_VirtReg;
+
+ return IK_Free;
+}
diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h
new file mode 100644
index 0000000..b3e2d7f
--- /dev/null
+++ b/lib/CodeGen/LiveRegMatrix.h
@@ -0,0 +1,148 @@
+//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRegMatrix analysis pass keeps track of virtual register interference
+// along two dimensions: Slot indexes and register units. The matrix is used by
+// register allocators to ensure that no interfering virtual registers get
+// assigned to overlapping physical registers.
+//
+// Register units are defined in MCRegisterInfo.h, they represent the smallest
+// unit of interference when dealing with overlapping physical registers. The
+// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
+// a virtual register is assigned to a physicval register, the live range for
+// the virtual register is inserted into the LiveIntervalUnion for each regunit
+// in the physreg.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
+#define LLVM_CODEGEN_LIVEREGMATRIX_H
+
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervalAnalysis;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+
+class LiveRegMatrix : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ // UserTag changes whenever virtual registers have been modified.
+ unsigned UserTag;
+
+ // The matrix is represented as a LiveIntervalUnion per register unit.
+ LiveIntervalUnion::Allocator LIUAlloc;
+ LiveIntervalUnion::Array Matrix;
+
+ // Cached queries per register unit.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ // Cached register mask interference info.
+ unsigned RegMaskTag;
+ unsigned RegMaskVirtReg;
+ BitVector RegMaskUsable;
+
+ // MachineFunctionPass boilerplate.
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void releaseMemory();
+public:
+ static char ID;
+ LiveRegMatrix();
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Check for interference before assigning virtual registers to physical
+ // registers.
+ //
+
+ /// Invalidate cached interference queries after modifying virtual register
+ /// live ranges. Interference checks may return stale information unless
+ /// caches are invalidated.
+ void invalidateVirtRegs() { ++UserTag; }
+
+ enum InterferenceKind {
+ /// No interference, go ahead and assign.
+ IK_Free = 0,
+
+ /// Virtual register interference. There are interfering virtual registers
+ /// assigned to PhysReg or its aliases. This interference could be resolved
+ /// by unassigning those other virtual registers.
+ IK_VirtReg,
+
+ /// Register unit interference. A fixed live range is in the way, typically
+ /// argument registers for a call. This can't be resolved by unassigning
+ /// other virtual registers.
+ IK_RegUnit,
+
+ /// RegMask interference. The live range is crossing an instruction with a
+ /// regmask operand that doesn't preserve PhysReg. This typically means
+ /// VirtReg is live across a call, and PhysReg isn't call-preserved.
+ IK_RegMask
+ };
+
+ /// Check for interference before assigning VirtReg to PhysReg.
+ /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
+ /// When there is more than one kind of interference, the InterferenceKind
+ /// with the highest enum value is returned.
+ InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Assign VirtReg to PhysReg.
+ /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
+ /// update VirtRegMap. The live range is expected to be available in PhysReg.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Unassign VirtReg from its PhysReg.
+ /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
+ /// the assignment and updates VirtRegMap accordingly.
+ void unassign(LiveInterval &VirtReg);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Provide access to the underlying LiveIntervalUnions.
+ //
+
+ /// Check for regmask interference only.
+ /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
+ /// If PhysReg is null, check if VirtReg crosses any regmask operands.
+ bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+
+ /// Check for regunit interference only.
+ /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
+ /// register units.
+ bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// Query a line of the assigned virtual register matrix directly.
+ /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
+ /// This returns a reference to an internal Query data structure that is only
+ /// valid until the next query() call.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
+
+ /// Directly access the live interval unions per regunit.
+ /// This returns an array indexed by the regunit number.
+ LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEREGMATRIX_H
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 5a0d97d..348ed3a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -192,8 +192,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = NULL;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
continue;
@@ -216,9 +216,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
unsigned DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
PartDefRegs.insert(DefReg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- PartDefRegs.insert(SubReg);
+ for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs)
+ PartDefRegs.insert(*SubRegs);
}
}
return LastDef;
@@ -247,8 +246,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (Processed.count(SubReg))
continue;
if (PartDefRegs.count(SubReg))
@@ -259,7 +258,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
Processed.insert(*SS);
}
}
@@ -271,9 +270,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
// Remember this use.
PhysRegUse[Reg] = MI;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- PhysRegUse[SubReg] = MI;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ PhysRegUse[*SubRegs] = MI;
}
/// FindLastRefOrPartRef - Return the last reference or partial reference of
@@ -287,8 +285,8 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -336,8 +334,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
MachineInstr *LastPartDef = 0;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -351,7 +349,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
PartUses.insert(SubReg);
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
PartUses.insert(*SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
@@ -367,8 +365,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// EAX<dead> = op AL<imp-def>
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (!PartUses.count(SubReg))
continue;
bool NeedDef = true;
@@ -388,11 +386,10 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
PhysRegUse[SubReg] = LastRefOrPartRef;
- for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
- unsigned SSReg = *SSRegs; ++SSRegs)
- PhysRegUse[SSReg] = LastRefOrPartRef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PhysRegUse[*SS] = LastRefOrPartRef;
}
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
PartUses.erase(*SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -434,7 +431,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
// Kill the largest clobbered super-register.
// This avoids needless implicit operands.
unsigned Super = Reg;
- for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
Super = *SR;
HandlePhysRegKill(Super, 0);
@@ -447,11 +444,11 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
Live.insert(Reg);
- for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
- Live.insert(*SS);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Live.insert(*SubRegs);
} else {
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
// e.g.
@@ -462,7 +459,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
Live.insert(SubReg);
- for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
Live.insert(*SS);
}
}
@@ -472,8 +469,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
continue;
@@ -491,8 +488,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
Defs.pop_back();
PhysRegDef[Reg] = MI;
PhysRegUse[Reg] = NULL;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
PhysRegDef[SubReg] = MI;
PhysRegUse[SubReg] = NULL;
}
@@ -576,7 +573,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = MO.getReg();
if (MO.isUse()) {
MO.setIsKill(false);
- UseRegs.push_back(MOReg);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
} else /*MO.isDef()*/ {
MO.setIsDead(false);
DefRegs.push_back(MOReg);
@@ -732,8 +730,9 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
BBI != BBE && BBI->isPHI(); ++BBI)
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
- PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
- .push_back(BBI->getOperand(i).getReg());
+ if (BBI->getOperand(i).readsReg())
+ PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI->getOperand(i).getReg());
}
bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 238bf52..fbc9e20 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -314,7 +314,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// No previously defined register was in range, so create a
// new one.
int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
- const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
DEBUG(dbgs() << " Materializing base register " << BaseReg <<
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 1abb8f2..fa6b450 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
assert(N->getParent() != 0 && "machine instruction not in a basic block");
// Remove from the use/def lists.
- N->RemoveRegOperandsFromUseLists();
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
N->setParent(0);
@@ -271,11 +272,9 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
- if (Alignment) {
+ if (Alignment)
OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
<< " bytes)";
- Comma = ", ";
- }
OS << '\n';
@@ -312,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
if (!succ_empty()) {
if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
- for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
OS << " BB#" << (*SI)->getNumber();
+ if (!Weights.empty())
+ OS << '(' << *getWeightIterator(SI) << ')';
+ }
OS << '\n';
}
}
@@ -479,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) {
void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
MachineBasicBlock *New) {
- uint32_t weight = 0;
- succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+ if (Old == New)
+ return;
- // If Weight list is empty it means we don't use it (disabled optimization).
- if (!Weights.empty()) {
- weight_iterator WI = getWeightIterator(SI);
- weight = *WI;
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
}
+ assert(OldI != E && "Old is not a successor of this block");
+ Old->removePredecessor(this);
- // Update the successor information.
- removeSuccessor(SI);
- addSuccessor(New, weight);
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its weight instead of adding a duplicate edge.
+ if (!Weights.empty()) {
+ weight_iterator OldWI = getWeightIterator(OldI);
+ *getWeightIterator(NewI) += *OldWI;
+ Weights.erase(OldWI);
+ }
+ Successors.erase(OldI);
}
void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
@@ -509,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- uint32_t weight = 0;
-
+ uint32_t Weight = 0;
// If Weight list is empty it means we don't use it (disabled optimization).
if (!fromMBB->Weights.empty())
- weight = *fromMBB->Weights.begin();
+ Weight = *fromMBB->Weights.begin();
- addSuccessor(Succ, weight);
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
}
}
@@ -528,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
while (!fromMBB->succ_empty()) {
MachineBasicBlock *Succ = *fromMBB->succ_begin();
- addSuccessor(Succ);
+ uint32_t Weight = 0;
+ if (!fromMBB->Weights.empty())
+ Weight = *fromMBB->Weights.begin();
+ addSuccessor(Succ, Weight);
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
@@ -542,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
}
}
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
- return I != Successors.end();
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
}
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -596,6 +627,11 @@ bool MachineBasicBlock::canFallThrough() {
MachineBasicBlock *
MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isLandingPad())
+ return NULL;
+
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
@@ -670,7 +706,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Inherit live-ins from the successor
for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
- E = Succ->livein_end(); I != E; ++I)
+ E = Succ->livein_end(); I != E; ++I)
NMBB->addLiveIn(*I);
// Update LiveVariables.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5ba6851..c4dca2c 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -11,7 +11,7 @@
// structure and branch probability estimates.
//
// The pass strives to preserve the structure of the CFG (that is, retain
-// a topological ordering of basic blocks) in the absense of a *strong* signal
+// a topological ordering of basic blocks) in the absence of a *strong* signal
// to the contrary from probabilities. However, within the CFG structure, it
// attempts to choose an ordering which favors placing more likely sequences of
// blocks adjacent to each other.
@@ -63,17 +63,13 @@ namespace {
///
/// This is the datastructure representing a chain of consecutive blocks that
/// are profitable to layout together in order to maximize fallthrough
-/// probabilities. We also can use a block chain to represent a sequence of
-/// basic blocks which have some external (correctness) requirement for
-/// sequential layout.
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
///
-/// Eventually, the block chains will form a directed graph over the function.
-/// We provide an SCC-supporting-iterator in order to quicky build and walk the
-/// SCCs of block chains within a function.
-///
-/// The block chains also have support for calculating and caching probability
-/// information related to the chain itself versus other chains. This is used
-/// for ranking during the final layout of block chains.
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
class BlockChain {
/// \brief The sequence of blocks belonging to this chain.
///
@@ -179,10 +175,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief Allocator and owner of BlockChain structures.
///
- /// We build BlockChains lazily by merging together high probability BB
- /// sequences acording to the "Algo2" in the paper mentioned at the top of
- /// the file. To reduce malloc traffic, we allocate them using this slab-like
- /// allocator, and destroy them after the pass completes.
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
/// \brief Function wide BasicBlock to BlockChain mapping.
@@ -329,7 +326,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
// the MBPI analysis, we manually compute probabilities using the edge
// weights. This is suboptimal as it means that the somewhat subtle
// definition of edge weight semantics is encoded here as well. We should
- // improve the MBPI interface to effeciently support query patterns such as
+ // improve the MBPI interface to efficiently support query patterns such as
// this.
uint32_t BestWeight = 0;
uint32_t WeightScale = 0;
@@ -988,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+ if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher weight first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(PrevBB) << "\n");
+ DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+ << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PrevBB);
+ TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ }
PrevBB->updateTerminator();
+ }
}
// Fixup the last block.
@@ -1000,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
- // on the loop info here so that we can align backedges in unnatural CFGs and
- // backedges that were introduced purely because of the loop rotations done
- // during this layout pass.
- // FIXME: This isn't quite right, we shouldn't align backedges that result
- // from blocks being sunken below the exit block for the function.
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
return; // Don't care about loop alignment.
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
- SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
- for (BlockChain::iterator BI = FunctionChain.begin(),
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
BE = FunctionChain.end();
BI != BE; ++BI) {
- PreviousBlocks.insert(*BI);
- // Set alignment on the destination of all the back edges in the new
- // ordering.
- for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
- SE = (*BI)->succ_end();
- SI != SE; ++SI)
- if (PreviousBlocks.count(*SI))
- (*SI)->setAlignment(Align);
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(*BI);
+ if (!L)
+ continue;
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(*BI)) {
+ (*BI)->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, othe predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ (*BI)->setAlignment(Align);
}
}
@@ -1053,7 +1098,7 @@ namespace {
///
/// A separate pass to compute interesting statistics for evaluating block
/// placement. This is separate from the actual placement pass so that they can
-/// be computed in the absense of any placement transformations or when using
+/// be computed in the absence of any placement transformations or when using
/// alternative placement strategies.
class MachineBlockPlacementStats : public MachineFunctionPass {
/// \brief A handle to the branch probability pass.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index a63688e..896461f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -84,7 +84,7 @@ namespace {
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) const ;
+ MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
@@ -100,8 +100,7 @@ namespace {
void ExitScope(MachineBasicBlock *MBB);
bool ProcessBlock(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
};
} // end anonymous namespace
@@ -216,11 +215,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (MO.isDef() &&
(MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
continue;
- PhysRefs.insert(Reg);
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
if (MO.isDef())
PhysDefs.push_back(Reg);
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- PhysRefs.insert(*Alias);
}
return !PhysRefs.empty();
@@ -326,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ CSUses.insert(Use);
+ }
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *Use = &*I;
+ if (!CSUses.count(Use)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
@@ -396,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -437,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
- SmallSet<unsigned,8> PhysRefs;
+ SmallSet<unsigned, 8> PhysRefs;
SmallVector<unsigned, 2> PhysDefs;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
FoundCSE = false;
@@ -465,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned OldReg = MO.getReg();
unsigned NewReg = CSMI->getOperand(i).getReg();
- if (OldReg == NewReg)
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+ if (OldReg == NewReg) {
+ --NumDefs;
continue;
+ }
assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -488,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// within the register class of the new instruction.
const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
DoCSE = false;
break;
}
@@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
MRI->clearKillFlags(CSEPairs[i].second);
}
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+ CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
if (CrossMBBPhysDef) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
@@ -522,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
Exps.push_back(MI);
}
CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
}
return Changed;
@@ -537,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
/// up the dominator tree to destroy ancestors which are now done.
void
MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
- DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
if (OpenChildren[Node])
return;
@@ -546,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
ExitScope(Node->getBlock());
// Now traverse upwards to pop ancestors whose offsprings are all done.
- while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
unsigned Left = --OpenChildren[Parent];
if (Left != 0)
break;
@@ -558,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
SmallVector<MachineDomTreeNode*, 32> Scopes;
SmallVector<MachineDomTreeNode*, 8> WorkList;
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
CurrVN = 0;
@@ -573,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
OpenChildren[Node] = NumChildren;
for (unsigned i = 0; i != NumChildren; ++i) {
MachineDomTreeNode *Child = Children[i];
- ParentMap[Child] = Node;
WorkList.push_back(Child);
}
} while (!WorkList.empty());
@@ -586,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
EnterScope(MBB);
Changed |= ProcessBlock(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
- ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ ExitScopeIfDone(Node, OpenChildren);
}
return Changed;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 9730eaa..bac3aa2 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -62,28 +62,16 @@ void
MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
SourceMap &SrcMap,
DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
- SourceMap::iterator SI = SrcMap.find(Reg);
- if (SI != SrcMap.end()) {
- const DestList& Defs = SI->second;
- for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
- I != E; ++I) {
- unsigned MappedDef = *I;
- // Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
- AvailCopyMap.erase(*SR);
- }
- }
- }
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- SI = SrcMap.find(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SourceMap::iterator SI = SrcMap.find(*AI);
if (SI != SrcMap.end()) {
const DestList& Defs = SI->second;
for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
I != E; ++I) {
unsigned MappedDef = *I;
+ // Source of copy is no longer available for propagation.
if (AvailCopyMap.erase(MappedDef)) {
- for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
AvailCopyMap.erase(*SR);
}
}
@@ -188,11 +176,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
// If Src is defined by a previous copy, it cannot be eliminated.
- CI = CopyMap.find(Src);
- if (CI != CopyMap.end())
- MaybeDeadCopies.remove(CI->second);
- for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
- CI = CopyMap.find(*AS);
+ for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
+ CI = CopyMap.find(*AI);
if (CI != CopyMap.end())
MaybeDeadCopies.remove(CI->second);
}
@@ -211,13 +196,13 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Remember Def is defined by the copy.
// ... Make sure to clear the def maps of aliases first.
- for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
- CopyMap.erase(*AS);
- AvailCopyMap.erase(*AS);
+ for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
}
CopyMap[Def] = MI;
AvailCopyMap[Def] = MI;
- for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+ for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) {
CopyMap[*SR] = MI;
AvailCopyMap[*SR] = MI;
}
@@ -256,11 +241,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination.
- DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
- if (CI != CopyMap.end())
- MaybeDeadCopies.remove(CI->second);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- CI = CopyMap.find(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
if (CI != CopyMap.end())
MaybeDeadCopies.remove(CI->second);
}
@@ -296,11 +278,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
unsigned Reg = Defs[i];
// No longer defined by a copy.
- CopyMap.erase(Reg);
- AvailCopyMap.erase(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- CopyMap.erase(*AS);
- AvailCopyMap.erase(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
}
// If 'Reg' is previously source of a copy, it is no longer available for
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d8c2f6a..d4aede8a 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
@@ -60,7 +60,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
MFInfo = 0;
FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
if (Fn->hasFnAttr(Attribute::StackAlignment))
- FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+ FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
Fn->getAttributes().getFnAttributes()));
ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
@@ -84,9 +84,13 @@ MachineFunction::~MachineFunction() {
MFInfo->~MachineFunctionInfo();
Allocator.Deallocate(MFInfo);
}
- FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo);
- ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool);
-
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
if (JumpTableInfo) {
JumpTableInfo->~MachineJumpTableInfo();
Allocator.Deallocate(JumpTableInfo);
@@ -98,7 +102,7 @@ MachineFunction::~MachineFunction() {
MachineJumpTableInfo *MachineFunction::
getOrCreateJumpTableInfo(unsigned EntryKind) {
if (JumpTableInfo) return JumpTableInfo;
-
+
JumpTableInfo = new (Allocator)
MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
return JumpTableInfo;
@@ -116,12 +120,12 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBI = begin();
else
MBBI = MBB;
-
+
// Figure out the block number this should have.
unsigned BlockNo = 0;
if (MBBI != begin())
BlockNo = prior(MBBI)->getNumber()+1;
-
+
for (; MBBI != E; ++MBBI, ++BlockNo) {
if (MBBI->getNumber() != (int)BlockNo) {
// Remove use of the old number.
@@ -130,7 +134,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
"MBB number mismatch!");
MBBNumbering[MBBI->getNumber()] = 0;
}
-
+
// If BlockNo is already taken, set that block's number to -1.
if (MBBNumbering[BlockNo])
MBBNumbering[BlockNo]->setNumber(-1);
@@ -138,7 +142,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering[BlockNo] = MBBI;
MBBI->setNumber(BlockNo);
}
- }
+ }
// Okay, all the blocks are renumbered. If we have compactified the block
// numbering, shrink MBBNumbering now.
@@ -295,16 +299,16 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
// Print Frame Information
FrameInfo->print(*this, OS);
-
+
// Print JumpTable Information
if (JumpTableInfo)
JumpTableInfo->print(OS);
// Print Constant Pool
ConstantPool->print(OS);
-
+
const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
-
+
if (RegInfo && !RegInfo->livein_empty()) {
OS << "Function Live Ins: ";
for (MachineRegisterInfo::livein_iterator
@@ -324,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
-
+
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
BB->print(OS, Indexes);
@@ -411,10 +415,9 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
assert(JumpTableInfo && "No jump tables");
-
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
-
+
const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
MAI.getPrivateGlobalPrefix();
SmallString<60> Name;
@@ -691,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
else if (B->getType() != IntTy)
B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
const_cast<Constant*>(B), TD);
-
+
return A == B;
}
@@ -714,7 +717,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
Constants[i].Alignment = Alignment;
return i;
}
-
+
Constants.push_back(MachineConstantPoolEntry(C, Alignment));
return Constants.size()-1;
}
@@ -723,7 +726,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
unsigned Alignment) {
assert(Alignment && "Alignment must be specified!");
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
-
+
// Check to see if we already have this constant.
//
// FIXME, this could be made much more efficient for large constant pools.
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 2aaa798..0102ac7 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -14,7 +14,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -28,6 +30,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
raw_ostream &OS;
const std::string Banner;
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
: MachineFunctionPass(ID), OS(os), Banner(banner) {}
@@ -40,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) {
OS << "# " << Banner << ":\n";
- MF.print(OS);
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
return false;
}
};
@@ -48,6 +51,10 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
char MachineFunctionPrinterPass::ID = 0;
}
+char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
+ "Machine Function Printer", false, false)
+
namespace llvm {
/// Returns a newly-created MachineFunction Printer pass. The
/// default banner is empty.
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e553a04..b166849 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/LLVMContext.h"
@@ -33,7 +34,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LeakDetector.h"
@@ -47,55 +47,6 @@ using namespace llvm;
// MachineOperand Implementation
//===----------------------------------------------------------------------===//
-/// AddRegOperandToRegInfo - Add this register operand to the specified
-/// MachineRegisterInfo. If it is null, then the next/prev fields should be
-/// explicitly nulled out.
-void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
- assert(isReg() && "Can only add reg operand to use lists");
-
- // If the reginfo pointer is null, just explicitly null out or next/prev
- // pointers, to ensure they are not garbage.
- if (RegInfo == 0) {
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
- return;
- }
-
- // Otherwise, add this operand to the head of the registers use/def list.
- MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
- // For SSA values, we prefer to keep the definition at the start of the list.
- // we do this by skipping over the definition if it is at the head of the
- // list.
- if (*Head && (*Head)->isDef())
- Head = &(*Head)->Contents.Reg.Next;
-
- Contents.Reg.Next = *Head;
- if (Contents.Reg.Next) {
- assert(getReg() == Contents.Reg.Next->getReg() &&
- "Different regs on the same list!");
- Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
- }
-
- Contents.Reg.Prev = Head;
- *Head = this;
-}
-
-/// RemoveRegOperandFromRegInfo - Remove this register operand from the
-/// MachineRegisterInfo it is linked with.
-void MachineOperand::RemoveRegOperandFromRegInfo() {
- assert(isOnRegUseList() && "Reg operand is not on a use list");
- // Unlink this from the doubly linked list of operands.
- MachineOperand *NextOp = Contents.Reg.Next;
- *Contents.Reg.Prev = NextOp;
- if (NextOp) {
- assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
- NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
- }
- Contents.Reg.Prev = 0;
- Contents.Reg.Next = 0;
-}
-
void MachineOperand::setReg(unsigned Reg) {
if (getReg() == Reg) return; // No change.
@@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineInstr *MI = getParent())
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
- RemoveRegOperandFromRegInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
SmallContents.RegNo = Reg;
- AddRegOperandToRegInfo(&MF->getRegInfo());
+ MRI.addRegOperandToUseList(this);
return;
}
@@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
setReg(Reg);
}
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
- if (isReg() && getParent() && getParent()->getParent() &&
- getParent()->getParent()->getParent())
- RemoveRegOperandFromRegInfo();
+ if (isReg() && isOnRegUseList())
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
@@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- // If this operand is already a register operand, use setReg to update the
+ MachineRegisterInfo *RegInfo = 0;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
// register's use/def lists.
- if (isReg()) {
- assert(!isEarlyClobber());
- setReg(Reg);
- } else {
- // Otherwise, change this to a register and set the reg#.
- OpKind = MO_Register;
- SmallContents.RegNo = Reg;
-
- // If this operand is embedded in a function, add the operand to the
- // register's use/def list.
- if (MachineInstr *MI = getParent())
- if (MachineBasicBlock *MBB = MI->getParent())
- if (MachineFunction *MF = MBB->getParent())
- AddRegOperandToRegInfo(&MF->getRegInfo());
- }
+ if (RegInfo && isReg())
+ RegInfo->removeRegOperandFromUseList(this);
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg = 0;
IsDef = isDef;
IsImp = isImp;
IsKill = isKill;
@@ -182,11 +151,18 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
- SubReg = 0;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
}
/// isIdenticalTo - Return true if this operand is identical to the specified
-/// operand.
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
if (getType() != Other.getType() ||
getTargetFlags() != Other.getTargetFlags())
@@ -207,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_FrameIndex:
return getIndex() == Other.getIndex();
case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
case MachineOperand::MO_JumpTableIndex:
return getIndex() == Other.getIndex();
@@ -227,6 +204,47 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
llvm_unreachable("Invalid machine operand type");
}
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(),
+ MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress());
+ case MachineOperand::MO_RegisterMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
/// print - Print the specified machine operand.
///
void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -255,12 +273,16 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "imp-";
OS << "def";
NeedComma = true;
+ // <def,read-undef> only makes sense when getSubReg() is set.
+ // Don't clutter the output otherwise.
+ if (isUndef() && getSubReg())
+ OS << ",read-undef";
} else if (isImplicit()) {
OS << "imp-use";
NeedComma = true;
}
- if (isKill() || isDead() || isUndef() || isInternalRead()) {
+ if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) {
if (NeedComma) OS << ',';
NeedComma = false;
if (isKill()) {
@@ -271,7 +293,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "dead";
NeedComma = true;
}
- if (isUndef()) {
+ if (isUndef() && isUse()) {
if (NeedComma) OS << ',';
OS << "undef";
NeedComma = true;
@@ -308,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
case MachineOperand::MO_JumpTableIndex:
OS << "<jt#" << getIndex() << '>';
break;
@@ -605,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists() {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
- }
+ MRI.removeRegOperandFromUseList(&Operands[i]);
}
/// AddRegOperandsToUseLists - Add all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(&RegInfo);
- }
+ MRI.addRegOperandToUseList(&Operands[i]);
}
-
/// addOperand - Add the specified operand to the instruction. If it is an
/// implicit operand, it is added to the end of the operand list. If it is
/// an explicit operand it is added at the end of the explicit operand list
@@ -650,13 +674,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
--OpNo;
if (RegInfo)
- Operands[OpNo].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
}
}
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
- assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) &&
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands()) &&
"Trying to add an operand to a machine instr that is already done!");
// All operands from OpNo have been removed from RegInfo. If the Operands
@@ -665,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
// Insert the new operand at OpNo.
Operands.insert(Operands.begin() + OpNo, Op);
@@ -676,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Reallocate)
for (unsigned i = 0; i != OpNo; ++i)
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
// When adding a register operand, tell RegInfo about it.
if (Operands[OpNo].isReg()) {
- // Add the new operand to RegInfo, even when RegInfo is NULL.
- // This will initialize the linked list pointers.
- Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ Operands[OpNo].Contents.Reg.Prev = 0;
+ // Add the new operand to RegInfo.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(&Operands[OpNo]);
// If the register operand is flagged as early, mark the operand as such.
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
Operands[OpNo].setIsEarlyClobber(true);
@@ -692,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (RegInfo) {
for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
assert(Operands[i].isReg() && "Should only be an implicit reg!");
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -702,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
+ MachineRegisterInfo *RegInfo = getRegInfo();
// Special case removing the last one.
if (OpNo == Operands.size()-1) {
// If needed, remove from the reg def/use list.
- if (Operands.back().isReg() && Operands.back().isOnRegUseList())
- Operands.back().RemoveRegOperandFromRegInfo();
+ if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
+ RegInfo->removeRegOperandFromUseList(&Operands.back());
Operands.pop_back();
return;
@@ -716,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
// Otherwise, we are removing an interior operand. If we have reginfo to
// update, remove all operands that will be shifted down from their reg lists,
// move everything down, then re-add them.
- MachineRegisterInfo *RegInfo = getRegInfo();
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].RemoveRegOperandFromRegInfo();
+ RegInfo->removeRegOperandFromUseList(&Operands[i]);
}
}
@@ -729,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
if (RegInfo) {
for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
if (Operands[i].isReg())
- Operands[i].AddRegOperandToRegInfo(RegInfo);
+ RegInfo->addRegOperandToUseList(&Operands[i]);
}
}
}
@@ -868,7 +896,8 @@ void MachineInstr::eraseFromParent() {
MBB->erase(MI);
}
}
- getParent()->erase(this);
+ // Erase the individual instruction, which may itself be inside a bundle.
+ getParent()->erase_instr(this);
}
@@ -938,9 +967,13 @@ const TargetRegisterClass*
MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getParent()->getParent() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getParent()->getParent();
+
// Most opcodes have fixed constraints in their MCInstrDesc.
if (!isInlineAsm())
- return TII->getRegClass(getDesc(), OpIdx, TRI);
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
if (!getOperand(OpIdx).isReg())
return NULL;
@@ -962,7 +995,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
// Assume that all registers in a memory operand are pointers.
if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
- return TRI->getPointerRegClass();
+ return TRI->getPointerRegClass(MF);
return NULL;
}
@@ -1530,12 +1563,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
- for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
- unsigned AliasReg = *Alias; ++Alias)
+ for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true);
+ AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
HasAliasLive = true;
break;
}
+ }
if (!HasAliasLive) {
OmittedAnyCallClobbers = true;
continue;
@@ -1667,7 +1702,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
- bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
SmallVector<unsigned,4> DeadOps;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
@@ -1739,7 +1775,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
- bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
SmallVector<unsigned,4> DeadOps;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
@@ -1758,9 +1795,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
// There exists a super-register that's marked dead.
if (RegInfo->isSuperRegister(IncomingReg, Reg))
return true;
- if (RegInfo->getSubRegisters(IncomingReg) &&
- RegInfo->getSuperRegisters(Reg) &&
- RegInfo->isSubRegister(IncomingReg, Reg))
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
DeadOps.push_back(i);
}
}
@@ -1841,52 +1876,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
// Build up a buffer of hash code components.
- //
- // FIXME: This is a total hack. We should have a hash_value overload for
- // MachineOperand, but currently that doesn't work because there are many
- // different ideas of "equality" and thus different sets of information that
- // contribute to the hash code. This one happens to want to take a specific
- // subset. And it's still not clear that this routine uses the *correct*
- // subset of information when computing the hash code. The goal is to use the
- // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
- // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
- // be very useful to factor the selection of relevant inputs out of the two
- // functions and into a common routine, but it's not clear how that can be
- // done.
SmallVector<size_t, 8> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- switch (MO.getType()) {
- default: break;
- case MachineOperand::MO_Register:
- if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue; // Skip virtual register defs.
- HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
- break;
- case MachineOperand::MO_Immediate:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
- break;
- case MachineOperand::MO_FrameIndex:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_JumpTableIndex:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
- break;
- case MachineOperand::MO_MachineBasicBlock:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
- break;
- case MachineOperand::MO_GlobalAddress:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
- break;
- case MachineOperand::MO_BlockAddress:
- HashComponents.push_back(hash_combine(MO.getType(),
- MO.getBlockAddress()));
- break;
- case MachineOperand::MO_MCSymbol:
- HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
- break;
- }
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
}
return hash_combine_range(HashComponents.begin(), HashComponents.end());
}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 73489a7..b7de7bf 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -169,8 +169,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
if (!MO.isDead()) {
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
if (LocalDefSet.insert(SubReg))
LocalDefs.push_back(SubReg);
}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 8c562cc..efec481 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -445,8 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
}
if (MO.isImplicit()) {
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- PhysRegClobbers.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
RuledOut = true;
@@ -465,7 +465,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
// If we have already seen another instruction that defines the same
// register, then this is not safe. Two defs is indicated by setting a
// PhysRegClobbers bit.
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
if (PhysRegDefs.test(*AS))
PhysRegClobbers.set(*AS);
if (PhysRegClobbers.test(*AS))
@@ -517,8 +517,8 @@ void MachineLICM::HoistRegionPostRA() {
for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
E = BB->livein_end(); I != E; ++I) {
unsigned Reg = *I;
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- PhysRegDefs.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
}
SpeculationState = SpeculateUnknown;
@@ -540,8 +540,8 @@ void MachineLICM::HoistRegionPostRA() {
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
- TermRegs.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
}
}
@@ -1260,11 +1260,11 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
if (NewOpc == 0) return 0;
const MCInstrDesc &MID = TII->get(NewOpc);
if (MID.getNumDefs() != 1) return 0;
- const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
unsigned Reg = MRI->createVirtualRegister(RC);
- MachineFunction &MF = *MI->getParent()->getParent();
SmallVector<MachineInstr *, 2> NewMIs;
bool Success =
TII->unfoldMemoryOperand(MF, MI, Reg,
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 189cb2b..9f3829e 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -9,7 +9,7 @@
//
// This file defines the MachineLoopInfo class that is used to identify natural
// loops and determine the loop depth of various nodes of the CFG. Note that
-// the loops identified may actually be several natural loops that share the
+// the loops identified may actually be several natural loops that share the
// same header node... not just a single natural loop.
//
//===----------------------------------------------------------------------===//
@@ -17,17 +17,13 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
-namespace llvm {
-#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLB);
-#undef MLB
-#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLIB);
-#undef MLIB
-}
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
char MachineLoopInfo::ID = 0;
INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
@@ -40,7 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
- LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update
+ LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
return false;
}
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index 58e067b..cb204fd 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -18,6 +18,19 @@ using namespace llvm;
void MachinePassRegistryListener::anchor() { }
+/// setDefault - Set the default constructor by name.
+void MachinePassRegistry::setDefault(StringRef Name) {
+ MachinePassCtor Ctor = 0;
+ for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
+ if (R->getName() == Name) {
+ Ctor = R->getCtor();
+ break;
+ }
+ }
+ assert(Ctor && "Unregistered pass name");
+ setDefault(Ctor);
+}
+
/// Add - Adds a function pass to the registration list.
///
void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7ea1517..5fb938f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
// New virtual register number.
unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-
- // Add a reg, but keep track of whether the vector reallocated or not.
- const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
- void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
VRegInfo.grow(Reg);
VRegInfo[Reg].first = RegClass;
RegAllocHints.grow(Reg);
-
- if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
- // The vector reallocated, handle this now.
- HandleVRegListReallocation();
return Reg;
}
@@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() {
VRegInfo.clear();
}
-/// HandleVRegListReallocation - We just added a virtual register to the
-/// VRegInfo info list and it reallocated. Update the use/def lists info
-/// pointers.
-void MachineRegisterInfo::HandleVRegListReallocation() {
- // The back pointers for the vreg lists point into the previous vector.
- // Update them to point to their correct slots.
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- MachineOperand *List = VRegInfo[Reg].second;
- if (!List) continue;
- // Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = 0;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = 0;
+ Last->Contents.Reg.Next = MO;
}
}
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = 0;
+ MO->Contents.Reg.Next = 0;
+}
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -162,14 +201,20 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
// Since we are in SSA form, we can use the first definition.
def_iterator I = def_begin(Reg);
+ assert((I.atEnd() || llvm::next(I) == def_end()) &&
+ "getVRegDef assumes a single definition or no definition");
return !I.atEnd() ? &*I : 0;
}
-bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
- use_iterator UI = use_begin(RegNo);
- if (UI == use_end())
- return false;
- return ++UI == use_end();
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+ if (def_empty(Reg)) return 0;
+ def_iterator I = def_begin(Reg);
+ if (llvm::next(I) != def_end())
+ return 0;
+ return &*I;
}
bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
@@ -268,15 +313,15 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
// Check if any overlapping register is modified.
- for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
- if (!def_empty(*R))
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (!def_empty(*AI))
return false;
// Check if any overlapping register is allocatable so it may be used later.
if (AllocatableRegs.empty())
AllocatableRegs = TRI->getAllocatableSet(MF);
- for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
- if (AllocatableRegs.test(*R))
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
+ if (AllocatableRegs.test(*AI))
return false;
return true;
}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 070a557..076547a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
}
MachineSSAUpdater::~MachineSSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
@@ -241,30 +241,6 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
I->second = NewReg;
}
-/// MachinePHIiter - Iterator for PHI operands. This is used for the
-/// PHI_iterator in the SSAUpdaterImpl template.
-namespace {
- class MachinePHIiter {
- private:
- MachineInstr *PHI;
- unsigned idx;
-
- public:
- explicit MachinePHIiter(MachineInstr *P) // begin iterator
- : PHI(P), idx(1) {}
- MachinePHIiter(MachineInstr *P, bool) // end iterator
- : PHI(P), idx(PHI->getNumOperands()) {}
-
- MachinePHIiter &operator++() { idx += 2; return *this; }
- bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
- bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
- unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
- MachineBasicBlock *getIncomingBlock() {
- return PHI->getOperand(idx+1).getMBB();
- }
- };
-}
-
/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
/// template, specialized for MachineSSAUpdater.
namespace llvm {
@@ -279,7 +255,26 @@ public:
static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
- typedef MachinePHIiter PHI_iterator;
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
static inline PHI_iterator PHI_end(PhiT *PHI) {
return PHI_iterator(PHI, true);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 1d3241b..a1dc948 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -17,9 +17,13 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -50,6 +54,15 @@ static bool ViewMISchedDAGs = false;
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
+MachineSchedContext::MachineSchedContext():
+ MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
namespace {
/// MachineScheduler runs after coalescing and before register allocation.
class MachineScheduler : public MachineSchedContext,
@@ -122,6 +135,29 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
/// Top-level MachineScheduler pass driver.
///
/// Visit blocks in function order. Divide each block into scheduling regions
@@ -139,6 +175,8 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+
// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
@@ -149,6 +187,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
LIS = &getAnalysis<LiveIntervals>();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ RegClassInfo->runOnMachineFunction(*MF);
+
// Select the scheduler, or set the default.
MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
if (Ctor == useDefaultMachineSched) {
@@ -163,13 +203,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
// Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
Scheduler->startBlock(MBB);
// Break the block into scheduling regions [I, RegionEnd), and schedule each
- // region as soon as it is discovered. RegionEnd points the the scheduling
+ // region as soon as it is discovered. RegionEnd points the scheduling
// boundary at the bottom of the region. The DAG does not include RegionEnd,
// but the region does (i.e. the next RegionEnd is above the previous
// RegionBegin). If the current block has no terminator then RegionEnd ==
@@ -181,6 +224,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
unsigned RemainingCount = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end()
|| TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
@@ -207,7 +251,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
Scheduler->exitRegion();
continue;
}
- DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF->getFunction()->getName()
<< ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
@@ -260,6 +305,9 @@ public:
/// be scheduled at the bottom.
virtual SUnit *pickNode(bool &IsTopNode) = 0;
+ /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node.
+ virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
+
/// When all predecessor dependencies have been resolved, free this node for
/// top-down scheduling.
virtual void releaseTopNode(SUnit *SU) = 0;
@@ -279,22 +327,45 @@ namespace {
/// machine instructions while updating LiveIntervals.
class ScheduleDAGMI : public ScheduleDAGInstrs {
AliasAnalysis *AA;
+ RegisterClassInfo *RegClassInfo;
MachineSchedStrategy *SchedImpl;
+ MachineBasicBlock::iterator LiveRegionEnd;
+
+ /// Register pressure in this region computed by buildSchedGraph.
+ IntervalPressure RegPressure;
+ RegPressureTracker RPTracker;
+
+ /// List of pressure sets that exceed the target's pressure limit before
+ /// scheduling, listed in increasing set ID order. Each pressure set is paired
+ /// with its max pressure in the currently scheduled regions.
+ std::vector<PressureElement> RegionCriticalPSets;
+
/// The top of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop;
+ IntervalPressure TopPressure;
+ RegPressureTracker TopRPTracker;
/// The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentBottom;
+ IntervalPressure BotPressure;
+ RegPressureTracker BotRPTracker;
+#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled;
+#endif
public:
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
- AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
- NumInstrsScheduled(0) {}
+ AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
+ RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
+ CurrentBottom(), BotRPTracker(BotPressure) {
+#ifndef NDEBUG
+ NumInstrsScheduled = 0;
+#endif
+ }
~ScheduleDAGMI() {
delete SchedImpl;
@@ -303,22 +374,68 @@ public:
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
- /// Implement ScheduleDAGInstrs interface.
+ /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
+ /// region. This covers all instructions in a block, while schedule() may only
+ /// cover a subset.
+ void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount);
+
+ /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
+ /// reorderable instructions.
void schedule();
+ /// Get current register pressure for the top scheduled instructions.
+ const IntervalPressure &getTopPressure() const { return TopPressure; }
+ const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
+
+ /// Get current register pressure for the bottom scheduled instructions.
+ const IntervalPressure &getBotPressure() const { return BotPressure; }
+ const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
+
+ /// Get register pressure for the entire scheduling region before scheduling.
+ const IntervalPressure &getRegPressure() const { return RegPressure; }
+
+ const std::vector<PressureElement> &getRegionCriticalPSets() const {
+ return RegionCriticalPSets;
+ }
+
+ /// getIssueWidth - Return the max instructions per scheduling group.
+ unsigned getIssueWidth() const {
+ return (InstrItins && InstrItins->SchedModel)
+ ? InstrItins->SchedModel->IssueWidth : 1;
+ }
+
+ /// getNumMicroOps - Return the number of issue slots required for this MI.
+ unsigned getNumMicroOps(MachineInstr *MI) const {
+ if (!InstrItins) return 1;
+ int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
+ }
+
protected:
+ void initRegPressure();
+ void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
bool checkSchedLimit();
+ void releaseRoots();
+
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
void releasePred(SUnit *SU, SDep *PredEdge);
void releasePredecessors(SUnit *SU);
+
+ void placeDebugValues();
};
} // namespace
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
@@ -345,6 +462,8 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
@@ -371,12 +490,17 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
- // Fix RegionBegin if the first instruction moves down.
+ // Advance RegionBegin if the first instruction moves down.
if (&*RegionBegin == MI)
- RegionBegin = llvm::next(RegionBegin);
+ ++RegionBegin;
+
+ // Update the instruction stream.
BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
LIS->handleMove(MI);
- // Fix RegionBegin if another instruction moves above the first instruction.
+
+ // Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
RegionBegin = MI;
}
@@ -392,12 +516,114 @@ bool ScheduleDAGMI::checkSchedLimit() {
return true;
}
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned endcount)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd =
+ (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void ScheduleDAGMI::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.getPressure().dump(TRI));
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ BotRPTracker.recede();
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (RegionPressure[i] > Limit)
+ RegionCriticalPSets.push_back(PressureElement(i, 0));
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].PSetID) << " ";
+ dbgs() << "\n");
+}
+
+// FIXME: When the pressure tracker deals in pressure differences then we won't
+// iterate over all RegionCriticalPSets[i].
+void ScheduleDAGMI::
+updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
+ unsigned ID = RegionCriticalPSets[i].PSetID;
+ int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
+ if ((int)NewMaxPressure[ID] > MaxUnits)
+ MaxUnits = NewMaxPressure[ID];
+ }
+}
+
+// Release all DAG roots for scheduling.
+void ScheduleDAGMI::releaseRoots() {
+ SmallVector<SUnit*, 16> BotRoots;
+
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (I->Preds.empty())
+ SchedImpl->releaseTopNode(&(*I));
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (I->Succs.empty())
+ BotRoots.push_back(&(*I));
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+ SchedImpl->releaseBottomNode(*I);
+}
+
/// schedule - Called back from MachineScheduler::runOnMachineFunction
-/// after setting up the current scheduling region.
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
void ScheduleDAGMI::schedule() {
- buildSchedGraph(AA);
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
- DEBUG(dbgs() << "********** MI Scheduling **********\n");
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
@@ -410,22 +636,12 @@ void ScheduleDAGMI::schedule() {
releasePredecessors(&ExitSU);
// Release all DAG roots for scheduling.
- for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
- I != E; ++I) {
- // A SUnit is ready to top schedule if it has no predecessors.
- if (I->Preds.empty())
- SchedImpl->releaseTopNode(&(*I));
- // A SUnit is ready to bottom schedule if it has no successors.
- if (I->Succs.empty())
- SchedImpl->releaseBottomNode(&(*I));
- }
+ releaseRoots();
- CurrentTop = RegionBegin;
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
CurrentBottom = RegionEnd;
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
- DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
- << " Scheduling Instruction:\n"; SU->dump(this));
if (!checkSchedLimit())
break;
@@ -435,28 +651,69 @@ void ScheduleDAGMI::schedule() {
if (IsTopNode) {
assert(SU->isTopReady() && "node still has unscheduled dependencies");
if (&*CurrentTop == MI)
- ++CurrentTop;
- else
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+
// Release dependent instructions for scheduling.
releaseSuccessors(SU);
}
else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
- if (&*llvm::prior(CurrentBottom) == MI)
- --CurrentBottom;
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
else {
- if (&*CurrentTop == MI)
- CurrentTop = llvm::next(CurrentTop);
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
moveInstruction(MI, CurrentBottom);
CurrentBottom = MI;
}
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
+
// Release dependent instructions for scheduling.
releasePredecessors(SU);
}
SU->isScheduled = true;
+ SchedImpl->schedNode(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == llvm::prior(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = NULL;
}
//===----------------------------------------------------------------------===//
@@ -464,56 +721,603 @@ void ScheduleDAGMI::schedule() {
//===----------------------------------------------------------------------===//
namespace {
+/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
+/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
+/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
+class ReadyQueue {
+ unsigned ID;
+ std::string Name;
+ std::vector<SUnit*> Queue;
+
+public:
+ ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
+
+ unsigned getID() const { return ID; }
+
+ StringRef getName() const { return Name; }
+
+ // SU is in this queue if it's NodeQueueID is a superset of this ID.
+ bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
+
+ bool empty() const { return Queue.empty(); }
+
+ unsigned size() const { return Queue.size(); }
+
+ typedef std::vector<SUnit*>::iterator iterator;
+
+ iterator begin() { return Queue.begin(); }
+
+ iterator end() { return Queue.end(); }
+
+ iterator find(SUnit *SU) {
+ return std::find(Queue.begin(), Queue.end(), SU);
+ }
+
+ void push(SUnit *SU) {
+ Queue.push_back(SU);
+ SU->NodeQueueId |= ID;
+ }
+
+ void remove(iterator I) {
+ (*I)->NodeQueueId &= ~ID;
+ *I = Queue.back();
+ Queue.pop_back();
+ }
+
+ void dump() {
+ dbgs() << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+ }
+};
+
/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
class ConvergingScheduler : public MachineSchedStrategy {
+
+ /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// lifetime of one invocation of pickNode().
+ struct SchedCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ // Register pressure values for the best candidate.
+ RegPressureDelta RPDelta;
+
+ SchedCandidate(): SU(NULL) {}
+ };
+ /// Represent the type of SchedCandidate found within a single queue.
+ enum CandResult {
+ NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };
+
+ /// Each Scheduling boundary is associated with ready queues. It tracks the
+ /// current cycle in whichever direction at has moved, and maintains the state
+ /// of "hazards" and other interlocks at the current cycle.
+ struct SchedBoundary {
+ ScheduleDAGMI *DAG;
+
+ ReadyQueue Available;
+ ReadyQueue Pending;
+ bool CheckPending;
+
+ ScheduleHazardRecognizer *HazardRec;
+
+ unsigned CurrCycle;
+ unsigned IssueCount;
+
+ /// MinReadyCycle - Cycle of the soonest available instruction.
+ unsigned MinReadyCycle;
+
+ // Remember the greatest min operand latency.
+ unsigned MaxMinLatency;
+
+ /// Pending queues extend the ready queues with the same ID and the
+ /// PendingFlag set.
+ SchedBoundary(unsigned ID, const Twine &Name):
+ DAG(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
+ MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+
+ ~SchedBoundary() { delete HazardRec; }
+
+ bool isTop() const {
+ return Available.getID() == ConvergingScheduler::TopQID;
+ }
+
+ bool checkHazard(SUnit *SU);
+
+ void releaseNode(SUnit *SU, unsigned ReadyCycle);
+
+ void bumpCycle();
+
+ void bumpNode(SUnit *SU);
+
+ void releasePending();
+
+ void removeReady(SUnit *SU);
+
+ SUnit *pickOnlyChoice();
+ };
+
ScheduleDAGMI *DAG;
+ const TargetRegisterInfo *TRI;
- unsigned NumTopReady;
- unsigned NumBottomReady;
+ // State of the top and bottom scheduled instruction boundaries.
+ SchedBoundary Top;
+ SchedBoundary Bot;
public:
- virtual void initialize(ScheduleDAGMI *dag) {
- DAG = dag;
+ /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
+ enum {
+ TopQID = 1,
+ BotQID = 2,
+ LogMaxQID = 2
+ };
+
+ ConvergingScheduler():
+ DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+
+ virtual void initialize(ScheduleDAGMI *dag);
+
+ virtual SUnit *pickNode(bool &IsTopNode);
+
+ virtual void schedNode(SUnit *SU, bool IsTopNode);
+
+ virtual void releaseTopNode(SUnit *SU);
+
+ virtual void releaseBottomNode(SUnit *SU);
+
+protected:
+ SUnit *pickNodeBidrectional(bool &IsTopNode);
- assert((!ForceTopDown || !ForceBottomUp) &&
- "-misched-topdown incompatible with -misched-bottomup");
+ CandResult pickNodeFromQueue(ReadyQueue &Q,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
+#ifndef NDEBUG
+ void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
+ PressureElement P = PressureElement());
+#endif
+};
+} // namespace
+
+void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+ DAG = dag;
+ TRI = DAG->TRI;
+ Top.DAG = dag;
+ Bot.DAG = dag;
+
+ // Initialize the HazardRecognizers.
+ const TargetMachine &TM = DAG->MF.getTarget();
+ const InstrItineraryData *Itin = TM.getInstrItineraryData();
+ Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+ Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+}
+
+void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+#endif
+ if (SU->TopReadyCycle < PredReadyCycle + Latency)
+ SU->TopReadyCycle = PredReadyCycle + Latency;
}
+ Top.releaseNode(SU, SU->TopReadyCycle);
+}
- virtual SUnit *pickNode(bool &IsTopNode) {
- if (DAG->top() == DAG->bottom())
- return NULL;
+void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
- // As an initial placeholder heuristic, schedule in the direction that has
- // the fewest choices.
- SUnit *SU;
- if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
- SU = DAG->getSUnit(DAG->top());
- IsTopNode = true;
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+ unsigned Latency =
+ DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+#ifndef NDEBUG
+ Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+#endif
+ if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+ SU->BotReadyCycle = SuccReadyCycle + Latency;
+ }
+ Bot.releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled())
+ return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
+
+ if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+ return true;
+
+ return false;
+}
+
+void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+ unsigned ReadyCycle) {
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ if (ReadyCycle > CurrCycle || checkHazard(SU))
+ Pending.push(SU);
+ else
+ Available.push(SU);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void ConvergingScheduler::SchedBoundary::bumpCycle() {
+ unsigned Width = DAG->getIssueWidth();
+ IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ }
+ else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
}
- else {
- SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
- IsTopNode = false;
+ }
+ CheckPending = true;
+
+ DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
}
- if (SU->isTopReady()) {
- assert(NumTopReady > 0 && "bad ready count");
- --NumTopReady;
+ HazardRec->EmitInstruction(SU);
+ }
+ // Check the instruction group dispatch limit.
+ // TODO: Check if this SU must end a dispatch group.
+ IssueCount += DAG->getNumMicroOps(SU->getInstr());
+ if (IssueCount >= DAG->getIssueWidth()) {
+ DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ bumpCycle();
+ }
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void ConvergingScheduler::SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// advance the cycle until at least one node is ready. If multiple instructions
+/// are ready, return NULL.
+SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ for (unsigned i = 0; Available.empty(); ++i) {
+ assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+ "permanent hazard"); (void)i;
+ bumpCycle();
+ releasePending();
+ }
+ if (Available.size() == 1)
+ return *Available.begin();
+ return NULL;
+}
+
+#ifndef NDEBUG
+void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
+ SUnit *SU, PressureElement P) {
+ dbgs() << Label << " " << Q.getName() << " ";
+ if (P.isValid())
+ dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+ << " ";
+ else
+ dbgs() << " ";
+ SU->dump(DAG);
+}
+#endif
+
+/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
+/// more desirable than RHS from scheduling standpoint.
+static bool compareRPDelta(const RegPressureDelta &LHS,
+ const RegPressureDelta &RHS) {
+ // Compare each component of pressure in decreasing order of importance
+ // without checking if any are valid. Invalid PressureElements are assumed to
+ // have UnitIncrease==0, so are neutral.
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease)
+ return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease)
+ return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
+
+ // Avoid increasing the max pressure of the entire region.
+ if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease)
+ return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
+
+ return false;
+}
+
+/// Pick the best candidate from the top queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+ConvergingScheduler::CandResult ConvergingScheduler::
+pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate) {
+ DEBUG(Q.dump());
+
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ // BestSU remains NULL if no top candidates beat the best existing candidate.
+ CandResult FoundCandidate = NoCand;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ RegPressureDelta RPDelta;
+ TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ // Initialize the candidate if needed.
+ if (!Candidate.SU) {
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = NodeOrder;
+ continue;
+ }
+ // Avoid exceeding the target's limit.
+ if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) {
+ DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleExcess;
+ continue;
+ }
+ if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleExcess)
+ FoundCandidate = MultiPressure;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (RPDelta.CriticalMax.UnitIncrease
+ < Candidate.RPDelta.CriticalMax.UnitIncrease) {
+ DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleCritical;
+ continue;
+ }
+ if (RPDelta.CriticalMax.UnitIncrease
+ > Candidate.RPDelta.CriticalMax.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleCritical)
+ FoundCandidate = MultiPressure;
+
+ // Avoid increasing the max pressure of the entire region.
+ if (RPDelta.CurrentMax.UnitIncrease
+ < Candidate.RPDelta.CurrentMax.UnitIncrease) {
+ DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = SingleMax;
+ continue;
}
- if (SU->isBottomReady()) {
- assert(NumBottomReady > 0 && "bad ready count");
- --NumBottomReady;
+ if (RPDelta.CurrentMax.UnitIncrease
+ > Candidate.RPDelta.CurrentMax.UnitIncrease)
+ continue;
+ if (FoundCandidate == SingleMax)
+ FoundCandidate = MultiPressure;
+
+ // Fall through to original instruction order.
+ // Only consider node order if Candidate was chosen from this Q.
+ if (FoundCandidate == NoCand)
+ continue;
+
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
+ || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ DEBUG(traceCandidate("NCAND", Q, *I));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ FoundCandidate = NodeOrder;
}
+ }
+ return FoundCandidate;
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
return SU;
}
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ return SU;
+ }
+ SchedCandidate BotCand;
+ // Prefer bottom scheduling when heuristics are silent.
+ CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+
+ // If either Q has a single candidate that provides the least increase in
+ // Excess pressure, we can immediately schedule from that Q.
+ //
+ // RegionCriticalPSets summarizes the pressure within the scheduled region and
+ // affects picking from either Q. If scheduling in one direction must
+ // increase pressure for one of the excess PSets, then schedule in that
+ // direction first to provide more freedom in the other direction.
+ if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ // Check if the top Q has a better candidate.
+ SchedCandidate TopCand;
+ CandResult TopResult = pickNodeFromQueue(Top.Available,
+ DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+
+ if (TopResult == SingleExcess || TopResult == SingleCritical) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // If either Q has a single candidate that minimizes pressure above the
+ // original region's pressure pick it.
+ if (BotResult == SingleMax) {
+ IsTopNode = false;
+ return BotCand.SU;
+ }
+ if (TopResult == SingleMax) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Check for a salient pressure difference and pick the best from either side.
+ if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+ IsTopNode = true;
+ return TopCand.SU;
+ }
+ // Otherwise prefer the bottom candidate in node order.
+ IsTopNode = false;
+ return BotCand.SU;
+}
- virtual void releaseTopNode(SUnit *SU) {
- ++NumTopReady;
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return NULL;
}
- virtual void releaseBottomNode(SUnit *SU) {
- ++NumBottomReady;
+ SUnit *SU;
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate TopCand;
+ CandResult TopResult =
+ pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ assert(TopResult != NoCand && "failed to find the first candidate");
+ (void)TopResult;
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
}
-};
-} // namespace
+ else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ SchedCandidate BotCand;
+ CandResult BotResult =
+ pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ assert(BotResult != NoCand && "failed to find the first candidate");
+ (void)BotResult;
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ }
+ else {
+ SU = pickNodeBidrectional(IsTopNode);
+ }
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling Instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
+ SU->dump(DAG));
+ return SU;
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
+/// it's state based on the current cycle before MachineSchedStrategy does.
+void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = Top.CurrCycle;
+ Top.bumpNode(SU);
+ }
+ else {
+ SU->BotReadyCycle = Bot.CurrCycle;
+ Bot.bumpNode(SU);
+ }
+}
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
@@ -592,6 +1396,8 @@ public:
return SU;
}
+ virtual void schedNode(SUnit *SU, bool IsTopNode) {}
+
virtual void releaseTopNode(SUnit *SU) {
TopQ.push(SU);
}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1ce546b..bc383cb 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -99,6 +99,16 @@ namespace {
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
+
+ // SuccessorSorter - Sort Successors according to their loop depth.
+ struct SuccessorSorter {
+ SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+ bool operator()(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) const {
+ return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+ }
+ MachineLoopInfo *LI;
+ };
} // end anonymous namespace
char MachineSinking::ID = 0;
@@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++SI) {
+ // We give successors with smaller loop depth higher priority.
+ SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+ std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+ for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+ E = Succs.end(); SI != E; ++SI) {
MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000..1a3aa60
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1153 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+ std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = MF->getTarget().getInstrInfo();
+ TRI = MF->getTarget().getRegisterInfo();
+ ItinData = MF->getTarget().getInstrItineraryData();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ BlockInfo.resize(MF->getNumBlockIDs());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = 0;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = 0;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ // FIXME: Compute per-functional unit counts.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *MI = I;
+ if (MI->isTransient())
+ continue;
+ ++InstrCount;
+ if (MI->isCall())
+ FBI->HasCalls = true;
+ }
+ FBI->InstrCount = InstrCount;
+ return FBI;
+}
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return 0;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = 0;
+ unsigned BestDepth = 0;
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ const MachineBasicBlock *Pred = *I;
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth)
+ Best = Pred, BestDepth = Depth;
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return 0;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = 0;
+ unsigned BestHeight = 0;
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight)
+ Best = Succ, BestHeight = Height;
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+ for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(*I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(*I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+ for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+ I != E; ++I) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(*I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(*I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_pred_iterator
+ I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (MachineBasicBlock::const_succ_iterator
+ I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+ TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(*I);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+ I != E; ++I)
+ Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = &*DefI;
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ bool HasPhysRegs = false;
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO->readsReg())
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+ if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI->getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO->isDef()) {
+ if (MO->isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(MO.getOperandNo());
+ } else if (MO->isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO->readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+ unsigned DefOp = LiveDefOps[i];
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const MachineInstr *UseMI = I;
+
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI->isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+ const DataDep &Dep = Deps[i];
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp,
+ /* FindMin = */ false);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO->readsReg())
+ ReadOps.push_back(MO.getOperandNo());
+ if (!MO->isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI->isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list.
+ if (I->MI)
+ DepHeight += TII->computeOperandLatency(ItinData,
+ MI, MO.getOperandNo(),
+ I->MI, I->Op);
+ else
+ // No UseMI. Just use the MI latency instead.
+ DepHeight += TII->getInstrLatency(ItinData, MI);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != MI) {
+ LRU.Cycle = Height;
+ LRU.MI = MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+ const MachineInstr *UseMI, unsigned UseHeight,
+ MIHeightMap &Heights,
+ const InstrItineraryData *ItinData,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
+/// of all the blocks in Trace. Stop when reaching the block that contains
+/// DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg LI = TBI.LiveIns[i];
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *PHI = I;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr *MI = --BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+ MTM.ItinData, MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ LiveInReg &LIR = TBI.LiveIns[i];
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ // FIXME: Check cache tags, recompute as needed.
+ computeTrace(MBB);
+ computeInstrDepths(MBB);
+ computeInstrHeights(MBB);
+ return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
+ Dep.DefMI, Dep.DefOp,
+ PHI, Dep.UseOp,
+ /* FindMin = */ false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // For now, we compute the resource depth from instruction count / issue
+ // width. Eventually, we should compute resource depth per functional unit
+ // and return the max.
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+ if (Model->IssueWidth != 0)
+ return Instrs / Model->IssueWidth;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 0000000..c5b86f3
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,341 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+ const MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *ItinData;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *Loops;
+
+public:
+ class Ensemble;
+ class Trace;
+ static char ID;
+ MachineTraceMetrics();
+ void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+ void releaseMemory();
+ void verifyAnalysis() const;
+
+ friend class Ensemble;
+ friend class Trace;
+
+ /// Per-basic block information that doesn't depend on the trace through the
+ /// block.
+ struct FixedBlockInfo {
+ /// The number of non-trivial instructions in the block.
+ /// Doesn't count PHI and COPY instructions that are likely to be removed.
+ unsigned InstrCount;
+
+ /// True when the block contains calls.
+ bool HasCalls;
+
+ FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+ /// Returns true when resource information for this block has been computed.
+ bool hasResources() const { return InstrCount != ~0u; }
+
+ /// Invalidate resource information.
+ void invalidate() { InstrCount = ~0u; }
+ };
+
+ /// Get the fixed resource information about MBB. Compute it on demand.
+ const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+ /// A virtual register or regunit required by a basic block or its trace
+ /// successors.
+ struct LiveInReg {
+ /// The virtual register required, or a register unit.
+ unsigned Reg;
+
+ /// For virtual registers: Minimum height of the defining instruction.
+ /// For regunits: Height of the highest user in the trace.
+ unsigned Height;
+
+ LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+ };
+
+ /// Per-basic block information that relates to a specific trace through the
+ /// block. Convergent traces means that only one of these is required per
+ /// block in a trace ensemble.
+ struct TraceBlockInfo {
+ /// Trace predecessor, or NULL for the first block in the trace.
+ /// Valid when hasValidDepth().
+ const MachineBasicBlock *Pred;
+
+ /// Trace successor, or NULL for the last block in the trace.
+ /// Valid when hasValidHeight().
+ const MachineBasicBlock *Succ;
+
+ /// The block number of the head of the trace. (When hasValidDepth()).
+ unsigned Head;
+
+ /// The block number of the tail of the trace. (When hasValidHeight()).
+ unsigned Tail;
+
+ /// Accumulated number of instructions in the trace above this block.
+ /// Does not include instructions in this block.
+ unsigned InstrDepth;
+
+ /// Accumulated number of instructions in the trace below this block.
+ /// Includes instructions in this block.
+ unsigned InstrHeight;
+
+ TraceBlockInfo() :
+ Pred(0), Succ(0),
+ InstrDepth(~0u), InstrHeight(~0u),
+ HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+ /// Returns true if the depth resources have been computed from the trace
+ /// above this block.
+ bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+ /// Returns true if the height resources have been computed from the trace
+ /// below this block.
+ bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+ /// Invalidate depth resources when some block above this one has changed.
+ void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+ /// Invalidate height resources when a block below this one has changed.
+ void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+ // Data-dependency-related information. Per-instruction depth and height
+ // are computed from data dependencies in the current trace, using
+ // itinerary data.
+
+ /// Instruction depths have been computed. This implies hasValidDepth().
+ bool HasValidInstrDepths;
+
+ /// Instruction heights have been computed. This implies hasValidHeight().
+ bool HasValidInstrHeights;
+
+ /// Critical path length. This is the number of cycles in the longest data
+ /// dependency chain through the trace. This is only valid when both
+ /// HasValidInstrDepths and HasValidInstrHeights are set.
+ unsigned CriticalPath;
+
+ /// Live-in registers. These registers are defined above the current block
+ /// and used by this block or a block below it.
+ /// This does not include PHI uses in the current block, but it does
+ /// include PHI uses in deeper blocks.
+ SmallVector<LiveInReg, 4> LiveIns;
+
+ void print(raw_ostream&) const;
+ };
+
+ /// InstrCycles represents the cycle height and depth of an instruction in a
+ /// trace.
+ struct InstrCycles {
+ /// Earliest issue cycle as determined by data dependencies and instruction
+ /// latencies from the beginning of the trace. Data dependencies from
+ /// before the trace are not included.
+ unsigned Depth;
+
+ /// Minimum number of cycles from this instruction is issued to the of the
+ /// trace, as determined by data dependencies and instruction latencies.
+ unsigned Height;
+ };
+
+ /// A trace represents a plausible sequence of executed basic blocks that
+ /// passes through the current basic block one. The Trace class serves as a
+ /// handle to internal cached data structures.
+ class Trace {
+ Ensemble &TE;
+ TraceBlockInfo &TBI;
+
+ unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+ public:
+ explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+ void print(raw_ostream&) const;
+
+ /// Compute the total number of instructions in the trace.
+ unsigned getInstrCount() const {
+ return TBI.InstrDepth + TBI.InstrHeight;
+ }
+
+ /// Return the resource depth of the top/bottom of the trace center block.
+ /// This is the number of cycles required to execute all instructions from
+ /// the trace head to the trace center block. The resource depth only
+ /// considers execution resources, it ignores data dependencies.
+ /// When Bottom is set, instructions in the trace center block are included.
+ unsigned getResourceDepth(bool Bottom) const;
+
+ /// Return the resource length of the trace. This is the number of cycles
+ /// required to execute the instructions in the trace if they were all
+ /// independent, exposing the maximum instruction-level parallelism.
+ ///
+ /// Any blocks in Extrablocks are included as if they were part of the
+ /// trace.
+ unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+ ArrayRef<const MachineBasicBlock*>()) const;
+
+ /// Return the length of the (data dependency) critical path through the
+ /// trace.
+ unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+ /// Return the depth and height of MI. The depth is only valid for
+ /// instructions in or above the trace center block. The height is only
+ /// valid for instructions in or below the trace center block.
+ InstrCycles getInstrCycles(const MachineInstr *MI) const {
+ return TE.Cycles.lookup(MI);
+ }
+
+ /// Return the slack of MI. This is the number of cycles MI can be delayed
+ /// before the critical path becomes longer.
+ /// MI must be an instruction in the trace center block.
+ unsigned getInstrSlack(const MachineInstr *MI) const;
+
+ /// Return the Depth of a PHI instruction in a trace center block successor.
+ /// The PHI does not have to be part of the trace.
+ unsigned getPHIDepth(const MachineInstr *PHI) const;
+ };
+
+ /// A trace ensemble is a collection of traces selected using the same
+ /// strategy, for example 'minimum resource height'. There is one trace for
+ /// every block in the function.
+ class Ensemble {
+ SmallVector<TraceBlockInfo, 4> BlockInfo;
+ DenseMap<const MachineInstr*, InstrCycles> Cycles;
+ friend class Trace;
+
+ void computeTrace(const MachineBasicBlock*);
+ void computeDepthResources(const MachineBasicBlock*);
+ void computeHeightResources(const MachineBasicBlock*);
+ unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+ void computeInstrDepths(const MachineBasicBlock*);
+ void computeInstrHeights(const MachineBasicBlock*);
+ void addLiveIns(const MachineInstr *DefMI,
+ ArrayRef<const MachineBasicBlock*> Trace);
+
+ protected:
+ MachineTraceMetrics &MTM;
+ virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+ virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+ explicit Ensemble(MachineTraceMetrics*);
+ const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+ const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+
+ public:
+ virtual ~Ensemble();
+ virtual const char *getName() const =0;
+ void print(raw_ostream&) const;
+ void invalidate(const MachineBasicBlock *MBB);
+ void verify() const;
+
+ /// Get the trace that passes through MBB.
+ /// The trace is computed on demand.
+ Trace getTrace(const MachineBasicBlock *MBB);
+ };
+
+ /// Strategies for selecting traces.
+ enum Strategy {
+ /// Select the trace through a block that has the fewest instructions.
+ TS_MinInstrCount,
+
+ TS_NumStrategies
+ };
+
+ /// Get the trace ensemble representing the given trace selection strategy.
+ /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+ /// and valid for the lifetime of the analysis pass.
+ Ensemble *getEnsemble(Strategy);
+
+ /// Invalidate cached information about MBB. This must be called *before* MBB
+ /// is erased, or the CFG is otherwise changed.
+ ///
+ /// This invalidates per-block information about resource usage for MBB only,
+ /// and it invalidates per-trace information for any trace that passes
+ /// through MBB.
+ ///
+ /// Call Ensemble::getTrace() again to update any trace handles.
+ void invalidate(const MachineBasicBlock *MBB);
+
+private:
+ // One entry per basic block, indexed by block number.
+ SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+ // One ensemble per strategy.
+ Ensemble* Ensembles[TS_NumStrategies];
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Trace &Tr) {
+ Tr.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+ const MachineTraceMetrics::Ensemble &En) {
+ En.print(OS);
+ return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 74ba94d..172402e 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -89,8 +89,8 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
- RV.push_back(*R);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RV.push_back(*SubRegs);
}
struct BBInfo {
@@ -191,9 +191,11 @@ namespace {
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBundleAfter(const MachineInstr *MI);
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
@@ -201,6 +203,10 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+ void report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI);
+ void report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
@@ -210,6 +216,10 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+ void verifyLiveIntervalSegment(const LiveInterval&,
+ LiveInterval::const_iterator);
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -288,6 +298,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = 0;
for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
@@ -295,15 +307,21 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
*OS << "Instruction: " << *MBBI;
continue;
}
- // Skip BUNDLE instruction for now. FIXME: We should add code to verify
- // the BUNDLE's specifically.
- if (MBBI->isBundle())
- continue;
+ // Is this a bundle header?
+ if (!MBBI->isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = MBBI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", MBBI);
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
visitMachineInstrAfter(MBBI);
}
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
visitMachineBasicBlockAfter(MFI);
}
visitMachineFunctionAfter();
@@ -340,9 +358,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
assert(MBB);
report(msg, MBB->getParent());
- *OS << "- basic block: " << MBB->getName()
- << " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")";
+ *OS << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (void*)MBB << ')';
if (Indexes)
*OS << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -367,6 +385,28 @@ void MachineVerifier::report(const char *msg,
*OS << "\n";
}
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+ const LiveInterval &LI) {
+ report(msg, MF);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+ const LiveInterval &LI) {
+ report(msg, MBB);
+ *OS << "- interval: ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ *OS << PrintReg(LI.reg, TRI);
+ else
+ *OS << PrintRegUnit(LI.reg, TRI);
+ *OS << ' ' << LI << '\n';
+}
+
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
@@ -384,10 +424,10 @@ void MachineVerifier::visitMachineFunctionBefore() {
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
Reg = regsReserved.find_next(Reg)) {
- for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
// FIXME: This should probably be:
- // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
- regsReserved.set(*Sub);
+ // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
+ regsReserved.set(*SubRegs);
}
}
@@ -466,8 +506,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().isBarrier() &&
- !TII->isPredicated(&MBB->back())) {
+ if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() &&
+ !TII->isPredicated(getBundleStart(&MBB->back()))) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
@@ -487,10 +527,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!MBB->back().isBarrier()) {
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -510,10 +550,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (MBB->back().isBarrier()) {
+ } else if (getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -530,10 +570,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!MBB->back().isBarrier()) {
+ } else if (!getBundleStart(&MBB->back())->isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().isTerminator()) {
+ } else if (!getBundleStart(&MBB->back())->isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -554,8 +594,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
continue;
}
regsLive.insert(*I);
- for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
- regsLive.insert(*R);
+ for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
regsLiveInButUnused = regsLive;
@@ -564,8 +604,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
BitVector PR = MFI->getPristineRegs(MBB);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
regsLive.insert(I);
- for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
- regsLive.insert(*R);
+ for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
regsKilled.clear();
@@ -575,6 +615,30 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
lastIndex = Indexes->getMBBStartIdx(MBB);
}
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(MI)) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ *OS << "First terminator was:\t" << *FirstTerminator;
+ }
+}
+
void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
if (MI->getNumOperands() < MCID.getNumOperands()) {
@@ -608,17 +672,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- // Ensure non-terminators don't follow terminators.
- // Ignore predicated terminators formed by if conversion.
- // FIXME: If conversion shouldn't need to violate this rule.
- if (MI->isTerminator() && !TII->isPredicated(MI)) {
- if (!FirstTerminator)
- FirstTerminator = MI;
- } else if (FirstTerminator) {
- report("Non-terminator instruction after the first terminator", MI);
- *OS << "First terminator was:\t" << *FirstTerminator;
- }
-
StringRef ErrorInfo;
if (!TII->verifyInstruction(MI, ErrorInfo))
report(ErrorInfo.data(), MI);
@@ -634,7 +687,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MONum < MCID.getNumDefs()) {
if (!MO->isReg())
report("Explicit definition must be a register", MO, MONum);
- else if (!MO->isDef())
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
report("Explicit definition marked as use", MO, MONum);
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
@@ -662,6 +715,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugValue())
checkLiveness(MO, MONum);
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
// Check register classes.
if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -672,7 +731,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Illegal subregister index for physical register", MO, MONum);
return;
}
- if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (!DRC->contains(Reg)) {
report("Illegal physical register for instruction", MO, MONum);
*OS << TRI->getName(Reg) << " is not a "
@@ -698,7 +758,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
}
- if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (SubIdx) {
const TargetRegisterClass *SuperRC =
TRI->getLargestLegalSuperClass(RC);
@@ -761,20 +822,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (MO->readsReg()) {
regsLiveInButUnused.erase(Reg);
- bool isKill = false;
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
- // A two-addr use counts as a kill if use and def are the same.
- unsigned DefReg = MI->getOperand(defIdx).getReg();
- if (Reg == DefReg)
- isKill = true;
- else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- report("Two-address instruction operands must be identical", MO, MONum);
- }
- } else
- isKill = MO->isKill();
-
- if (isKill)
+ if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
@@ -786,23 +834,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (!LI.liveAt(UseIdx)) {
- report("No live range at use", MO, MONum);
- *OS << UseIdx << " is not live in " << LI << '\n';
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+ LiveRangeQuery LRQ(*LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+ << ' ' << *LI << '\n';
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+ }
+ }
}
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
- report("Live range continues after kill flag", MO, MONum);
- *OS << "Live range: " << LI << '\n';
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ LiveRangeQuery LRQ(LI, UseIdx);
+ if (!LRQ.valueIn()) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -812,6 +881,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Reserved registers may be used even when 'dead'.
if (!isReserved(Reg))
report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
} else {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
// We don't know which virtual registers are live in, so only complain
@@ -841,12 +912,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Check LiveInts for a live range, but only for virtual registers.
if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
!LiveInts->isNotInMIMap(MI)) {
- SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+ if (VNI->def != DefIdx) {
report("Inconsistent valno->def", MO, MONum);
*OS << "Valno " << VNI->id << " is not defined at "
<< DefIdx << " in " << LI << '\n';
@@ -863,6 +935,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
set_subtract(regsLive, regsKilled); regsKilled.clear();
@@ -876,15 +955,6 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
}
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
-
- if (Indexes && Indexes->hasIndex(MI)) {
- SlotIndex idx = Indexes->getInstructionIndex(MI);
- if (!(idx > lastIndex)) {
- report("Instruction index out of order", MI);
- *OS << "Last instruction was at " << lastIndex << '\n';
- }
- lastIndex = idx;
- }
}
void
@@ -1025,7 +1095,21 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Now check liveness info if available
calcRegsRequired();
- if (MRI->isSSA() && !MF->empty()) {
+ // Check for killed virtual registers that should be live out.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ BBInfo &MInfo = MBBInfoMap[MFI];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ if (MInfo.regsKilled.count(*I)) {
+ report("Virtual register killed in block, but needed live out.", MFI);
+ *OS << "Virtual register " << PrintReg(*I)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
for (RegSet::iterator
I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
@@ -1069,292 +1153,298 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
- for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
- LVE = LiveInts->end(); LVI != LVE; ++LVI) {
- const LiveInterval &LI = *LVI->second;
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
// Spilling and splitting may leave unused registers around. Skip them.
- if (MRI->use_empty(LI.reg))
+ if (MRI->reg_nodbg_empty(Reg))
continue;
- // Physical registers have much weirdness going on, mostly from coalescing.
- // We should probably fix it, but for now just ignore them.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ *OS << PrintReg(Reg, TRI) << " still has defs or uses\n";
continue;
+ }
- assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
- I!=E; ++I) {
- VNInfo *VNI = *I;
- const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+ verifyLiveInterval(*LI);
+}
- if (!DefVNI) {
- if (!VNI->isUnused()) {
- report("Valno not live at def and not marked unused", MF);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
- continue;
- }
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+ VNInfo *VNI) {
+ if (VNI->isUnused())
+ return;
- if (VNI->isUnused())
- continue;
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (DefVNI != VNI) {
- report("Live range at def has different valno", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
- continue;
- }
+ if (!DefVNI) {
+ report("Valno not live at def and not marked unused", MF, LI);
+ *OS << "Valno #" << VNI->id << '\n';
+ return;
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
- if (!MBB) {
- report("Invalid definition index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ if (DefVNI != VNI) {
+ report("Live range at def has different valno", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live\n";
+ return;
+ }
- if (VNI->isPHIDef()) {
- if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
- report("PHIDef value is not defined at MBB start", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << ", not at the beginning of BB#" << MBB->getNumber()
- << " in " << LI << '\n';
- }
- } else {
- // Non-PHI def.
- const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
- if (!MI) {
- report("No instruction at def index", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ return;
+ }
- bool hasDef = false;
- bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- if (MOI->getReg() != LI.reg)
- continue;
- } else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
- !TRI->regsOverlap(LI.reg, MOI->getReg()))
- continue;
- }
- hasDef = true;
- if (MOI->isEarlyClobber())
- isEarlyClobber = true;
- }
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+ }
+ return;
+ }
- if (!hasDef) {
- report("Defining instruction does not modify register", MI);
- *OS << "Valno #" << VNI->id << " in " << LI << '\n';
- }
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ return;
+ }
- // Early clobber defs begin at USE slots, but other defs must begin at
- // DEF slots.
- if (isEarlyClobber) {
- if (!VNI->def.isEarlyClobber()) {
- report("Early clobber def must be at an early-clobber slot", MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- } else if (!VNI->def.isRegister()) {
- report("Non-PHI, non-early clobber def must be at a register slot",
- MF);
- *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
- << " in " << LI << '\n';
- }
- }
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ if (MOI->getReg() != LI.reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+ continue;
}
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
- for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const VNInfo *VNI = I->valno;
- assert(VNI && "Live range has no valno");
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
- if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
- report("Foreign valno in live range", MF);
- I->print(*OS);
- *OS << " has a valno not in " << LI << '\n';
- }
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot",
+ MBB, LI);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+ }
+}
- if (VNI->isUnused()) {
- report("Live range valno is marked unused", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+ LiveInterval::const_iterator I) {
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
+
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live range", MF, LI);
+ *OS << *I << " has a bad valno\n";
+ }
- const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
- if (!MBB) {
- report("Bad start of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
- SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
- if (I->start != MBBStartIdx && I->start != VNI->def) {
- report("Live segment must begin at MBB entry or valno def", MBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
- }
+ if (VNI->isUnused()) {
+ report("Live range valno is marked unused", MF, LI);
+ *OS << *I << '\n';
+ }
- const MachineBasicBlock *EndMBB =
- LiveInts->getMBBFromIndex(I->end.getPrevSlot());
- if (!EndMBB) {
- report("Bad end of live segment, no basic block", MF);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- continue;
- }
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB, LI);
+ *OS << *I << '\n';
+ }
- // No more checks for live-out segments.
- if (I->end == LiveInts->getMBBEndIdx(EndMBB))
- continue;
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF, LI);
+ *OS << *I << '\n';
+ return;
+ }
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- I->print(*OS);
- *OS << " in " << LI << '\n' << "Basic block starts at "
- << MBBStartIdx << '\n';
+ // No more checks for live-out segments.
+ if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+ I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (I->end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+
+ if (I->end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(I->start, I->end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (I->end.isEarlyClobber()) {
+ if (I+1 == LI.end() || (I+1)->start != I->end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB, LI);
+ *OS << *I << '\n';
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != LI.reg)
continue;
- }
+ if (MOI->readsReg())
+ hasRead = true;
+ if (MOI->isDef() && MOI->isDead())
+ hasDeadDef = true;
+ }
- // The block slot must refer to a basic block boundary.
- if (I->end.isBlock()) {
- report("Live segment ends at B slot of an instruction", MI);
+ if (I->end.isDead()) {
+ if (!hasDeadDef) {
+ report("Instruction doesn't have a dead def operand", MI);
I->print(*OS);
*OS << " in " << LI << '\n';
}
-
- if (I->end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(I->start, I->end)) {
- report("Live segment ending at dead slot spans instructions", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
-
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (I->end.isEarlyClobber()) {
- if (I+1 == E || (I+1)->start != I->end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
+ } else {
+ if (!hasRead) {
+ report("Instruction ending live range doesn't read the register", MI);
+ *OS << *I << " in " << LI << '\n';
}
+ }
+ }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // A live range can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != LI.reg)
- continue;
- if (MOI->readsReg())
- hasRead = true;
- if (MOI->isDef() && MOI->isDead())
- hasDeadDef = true;
- }
-
- if (I->end.isDead()) {
- if (!hasDeadDef) {
- report("Instruction doesn't have a dead def operand", MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- } else {
- if (!hasRead) {
- report("Instruction ending live range doesn't read the register",
- MI);
- I->print(*OS);
- *OS << " in " << LI << '\n';
- }
- }
- }
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
- // Now check all the basic blocks in this live segment.
- MachineFunction::const_iterator MFI = MBB;
- // Is this live range the beginning of a non-PHIDef VN?
- if (I->start == VNI->def && !VNI->isPHIDef()) {
- // Not live-in to any blocks.
- if (MBB == EndMBB)
- continue;
- // Skip this block.
- ++MFI;
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value.
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI, LI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
}
- for (;;) {
- assert(LiveInts->isLiveInToMBB(LI, MFI));
- // We don't know how to track physregs into a landing pad.
- if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
- MFI->isLandingPad()) {
- if (&*MFI == EndMBB)
- break;
- ++MFI;
- continue;
- }
- // Check that VNI is live-out of all predecessors.
- for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
- PE = MFI->pred_end(); PI != PE; ++PI) {
- SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
- const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
-
- if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
- continue;
-
- if (!PVNI) {
- report("Register not marked live out of predecessor", *PI);
- *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
- << PEnd << " in " << LI << '\n';
- continue;
- }
- if (PVNI != VNI) {
- report("Different value live out of predecessor", *PI);
- *OS << "Valno #" << PVNI->id << " live out of BB#"
- << (*PI)->getNumber() << '@' << PEnd
- << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
- << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
- }
- }
- if (&*MFI == EndMBB)
- break;
- ++MFI;
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI, LI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
}
}
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
- // Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF);
- *OS << NumComp << " components in " << LI << '\n';
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- *OS << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- *OS << ' ' << (*I)->id;
- *OS << '\n';
- }
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I)
+ verifyLiveIntervalValue(LI, *I);
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+ verifyLiveIntervalSegment(LI, I);
+
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
}
}
}
}
-
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 0ed4c34..e6e23da 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -171,23 +171,30 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
return true;
}
+/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg),
+ DE = MRI->def_end(); DI != DE; ++DI)
+ if (!DI->isImplicitDef())
+ return false;
+ return true;
+}
+
/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
/// are implicit_def's.
static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
const MachineRegisterInfo *MRI) {
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- unsigned SrcReg = MPhi->getOperand(i).getReg();
- const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (!DefMI || !DefMI->isImplicitDef())
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
return false;
- }
return true;
}
-
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
-/// under the assuption that it needs to be lowered in a way that supports
+/// under the assumption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
@@ -287,7 +294,8 @@ void PHIElimination::LowerAtomicPHINode(
for (int i = NumSrcs - 1; i >= 0; --i) {
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
-
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, MRI);
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
@@ -295,14 +303,6 @@ void PHIElimination::LowerAtomicPHINode(
// path the PHI.
MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
- // If source is defined by an implicit def, there is no need to insert a
- // copy.
- MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
- if (DefMI->isImplicitDef()) {
- ImpDefs.insert(DefMI);
- continue;
- }
-
// Check to make sure we haven't already emitted the copy for this block.
// This can happen because PHI nodes may have multiple entries for the same
// basic block.
@@ -315,12 +315,27 @@ void PHIElimination::LowerAtomicPHINode(
findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
- if (!reusedIncoming && IncomingReg)
- BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
+ }
+ }
// Now update live variable information if we have it. Otherwise we're done
- if (!LV) continue;
+ if (SrcUndef || !LV) continue;
// We want to be able to insert a kill of the register if this PHI (aka, the
// copy we just inserted) is the last use of the source value. Live
@@ -340,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode(
// add a kill marker in this block saying that it kills the incoming value!
if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
// In our final twist, we have to decide which instruction kills the
- // register. In most cases this is the copy, however, the first
- // terminator instruction at the end of the block may also use the value.
- // In this case, we should mark *it* as being the killing block, not the
- // copy.
- MachineBasicBlock::iterator KillInst;
- MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
- if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
- KillInst = Term;
-
- // Check that no other terminators use values.
-#ifndef NDEBUG
- for (MachineBasicBlock::iterator TI = llvm::next(Term);
- TI != opBlock.end(); ++TI) {
- if (TI->isDebugValue())
- continue;
- assert(!TI->readsRegister(SrcReg) &&
- "Terminator instructions cannot use virtual registers unless"
- "they are the first terminator in a block!");
- }
-#endif
- } else if (reusedIncoming || !IncomingReg) {
- // We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = Term;
- while (KillInst != opBlock.begin()) {
- --KillInst;
- if (KillInst->isDebugValue())
- continue;
- if (KillInst->readsRegister(SrcReg))
- break;
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = prior(InsertPos);
}
- } else {
- // We just inserted this copy.
- KillInst = prior(InsertPos);
}
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -412,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
bool Changed = false;
for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
- // We break edges when registers are live out from the predecessor block
- // (not considering PHI nodes). If the register is live in to this block
- // anyway, we would gain nothing from splitting.
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
// Avoid splitting backedges of loops. It would introduce small
// out-of-line blocks into the loop which is very bad for code placement.
- if (PreMBB != &MBB &&
- !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
- if (!MLI ||
- !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
- MLI->isLoopHeader(&MBB))) {
- if (PreMBB->SplitCriticalEdge(&MBB, this)) {
- Changed = true;
- ++NumCriticalEdgesSplit;
- }
- }
+ if (PreMBB == &MBB)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ if (IsLoopHeader && PreLoop == CurLoop)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ if (!LV.isLiveOut(Reg, *PreMBB))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ bool ShouldSplit = !LV.isLiveIn(Reg, MBB);
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+ continue;
}
+ Changed = true;
+ ++NumCriticalEdgesSplit;
}
}
return Changed;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 490547b..cfa3eec 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -48,6 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
+ cl::desc("Enable Early If-conversion"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -80,15 +83,23 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<std::string>
+PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
+ cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"));
+
+// Experimental option to run live inteerval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
/// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
if (Override)
- return &NoPassID;
- return ID;
+ return 0;
+ return PassID;
}
/// Allow Pass selection to be overriden by command line options. This supports
@@ -101,13 +112,13 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
case cl::BOU_UNSET:
return TargetID;
case cl::BOU_TRUE:
- if (TargetID != &NoPassID)
+ if (TargetID)
return TargetID;
- if (StandardID == &NoPassID)
+ if (StandardID == 0)
report_fatal_error("Target cannot enable pass");
return StandardID;
case cl::BOU_FALSE:
- return &NoPassID;
+ return 0;
}
llvm_unreachable("Invalid command line option state");
}
@@ -149,6 +160,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
if (StandardID == &DeadMachineInstructionElimID)
return applyDisable(TargetID, DisableMachineDCE);
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, !EnableEarlyIfConversion);
+
if (StandardID == &MachineLICMID)
return applyDisable(TargetID, DisableMachineLICM);
@@ -178,9 +192,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
"Target Pass Configuration", false, false)
char TargetPassConfig::ID = 0;
-static char NoPassIDAnchor = 0;
-char &llvm::NoPassID = NoPassIDAnchor;
-
// Pseudo Pass IDs.
char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
@@ -193,9 +204,13 @@ public:
// that are part of a standard pass pipeline without overridding the entire
// pipeline. This mechanism allows target options to inherit a standard pass's
// user interface. For example, a target may disable a standard pass by
- // default by substituting NoPass, and the user may still enable that standard
- // pass with an explicit command line option.
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
DenseMap<AnalysisID,AnalysisID> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
};
} // namespace llvm
@@ -207,7 +222,8 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false),
+ : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0),
+ Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false),
DisableVerify(false),
EnableTailMerge(true) {
@@ -218,11 +234,22 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
initializeCodeGen(*PassRegistry::getPassRegistry());
// Substitute Pseudo Pass IDs for real ones.
- substitutePass(EarlyTailDuplicateID, TailDuplicateID);
- substitutePass(PostRAMachineLICMID, MachineLICMID);
+ substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
+ substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ // Disable early if-conversion. Targets that are ready can enable it.
+ disablePass(&EarlyIfConverterID);
// Temporarily disable experimental passes.
- substitutePass(MachineSchedulerID, NoPassID);
+ substitutePass(&MachineSchedulerID, 0);
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ AnalysisID InsertedPassID) {
+ assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
+ std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ Impl->InsertedPasses.push_back(P);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -244,8 +271,9 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
Opt = Val;
}
-void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
- Impl->TargetPasses[&StandardID] = &TargetID;
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ AnalysisID TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
}
AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
@@ -256,29 +284,62 @@ AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
return I->second;
}
-/// Add a CodeGen pass at this point in the pipeline after checking for target
-/// and command line overrides.
-AnalysisID TargetPassConfig::addPass(char &ID) {
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P) {
assert(!Initialized && "PassConfig is immutable");
- AnalysisID TargetID = getPassSubstitution(&ID);
- AnalysisID FinalID = overridePass(&ID, TargetID);
- if (FinalID == &NoPassID)
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (Started && !Stopped)
+ PM->add(P);
+ if (StopAfter == PassID)
+ Stopped = true;
+ if (StartAfter == PassID)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
+ AnalysisID TargetID = getPassSubstitution(PassID);
+ AnalysisID FinalID = overridePass(PassID, TargetID);
+ if (FinalID == 0)
return FinalID;
Pass *P = Pass::createPass(FinalID);
if (!P)
llvm_unreachable("Pass ID not registered");
- PM->add(P);
+ addPass(P);
+ // Add the passes after the pass P if there is any.
+ for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
+ I != E; ++I) {
+ if ((*I).first == PassID) {
+ assert((*I).second && "Illegal Pass ID!");
+ Pass *NP = Pass::createPass((*I).second);
+ assert(NP && "Pass ID not registered");
+ addPass(NP);
+ }
+ }
return FinalID;
}
-void TargetPassConfig::printAndVerify(const char *Banner) const {
+void TargetPassConfig::printAndVerify(const char *Banner) {
if (TM->shouldPrintMachineCode())
- PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ addPass(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM->add(createMachineVerifierPass(Banner));
+ addPass(createMachineVerifierPass(Banner));
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -288,46 +349,73 @@ void TargetPassConfig::addIRPasses() {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
- PM->add(createTypeBasedAliasAnalysisPass());
- PM->add(createBasicAliasAnalysisPass());
+ addPass(createTypeBasedAliasAnalysisPass());
+ addPass(createBasicAliasAnalysisPass());
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
- PM->add(createVerifierPass());
+ addPass(createVerifierPass());
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- PM->add(createLoopStrengthReducePass(getTargetLowering()));
+ addPass(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
- PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
- PM->add(createGCLoweringPass());
+ addPass(createGCLoweringPass());
// Make sure that no unreachable blocks are instruction selected.
- PM->add(createUnreachableBlockEliminationPass());
+ addPass(createUnreachableBlockEliminationPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass(TM->getTargetLowering()));
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ case ExceptionHandling::Win64:
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
}
/// Add common passes that perform LLVM IR to IR transforms in preparation for
/// instruction selection.
void TargetPassConfig::addISelPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
- PM->add(createCodeGenPreparePass(getTargetLowering()));
+ addPass(createCodeGenPreparePass(getTargetLowering()));
- PM->add(createStackProtectorPass(getTargetLowering()));
+ addPass(createStackProtectorPass(getTargetLowering()));
addPreISel();
if (PrintISelInput)
- PM->add(createPrintFunctionPass("\n\n"
+ addPass(createPrintFunctionPass("\n\n"
"*** Final LLVM Code input to ISel ***\n",
&dbgs()));
// All passes which modify the LLVM IR are now complete; run the verifier
// to ensure that the IR is valid.
if (!DisableVerify)
- PM->add(createVerifierPass());
+ addPass(createVerifierPass());
}
/// Add the complete set of target-independent postISel code generator passes.
@@ -349,11 +437,26 @@ void TargetPassConfig::addISelPrepare() {
/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
/// before/after any target-independent pass. But it's currently overkill.
void TargetPassConfig::addMachinePasses() {
+ // Insert a machine instr printer pass after the specified pass.
+ // If -print-machineinstrs specified, print machineinstrs after all passes.
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+ else if (!StringRef(PrintMachineInstrs.getValue())
+ .equals("option-unspecified")) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
+ const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
+ assert (TPI && IPI && "Pass ID not registered!");
+ const char *TID = (char *)(TPI->getTypeInfo());
+ const char *IID = (char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
+
// Print the instruction selected machine code...
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- addPass(ExpandISelPseudosID);
+ addPass(&ExpandISelPseudosID);
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -362,7 +465,7 @@ void TargetPassConfig::addMachinePasses() {
else {
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID);
}
// Run pre-ra passes.
@@ -381,7 +484,7 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After PostRegAlloc passes");
// Insert prolog/epilog code. Eliminate abstract frame index references...
- addPass(PrologEpilogCodeInserterID);
+ addPass(&PrologEpilogCodeInserterID);
printAndVerify("After PrologEpilogCodeInserter");
/// Add passes that optimize machine instructions after register allocation.
@@ -389,7 +492,7 @@ void TargetPassConfig::addMachinePasses() {
addMachineLateOptimization();
// Expand pseudo instructions before second scheduling pass.
- addPass(ExpandPostRAPseudosID);
+ addPass(&ExpandPostRAPseudosID);
printAndVerify("After ExpandPostRAPseudos");
// Run pre-sched2 passes.
@@ -398,14 +501,14 @@ void TargetPassConfig::addMachinePasses() {
// Second pass scheduler.
if (getOptLevel() != CodeGenOpt::None) {
- addPass(PostRASchedulerID);
+ addPass(&PostRASchedulerID);
printAndVerify("After PostRAScheduler");
}
// GC
- addPass(GCMachineCodeAnalysisID);
+ addPass(&GCMachineCodeAnalysisID);
if (PrintGCInfo)
- PM->add(createGCInfoPrinter(dbgs()));
+ addPass(createGCInfoPrinter(dbgs()));
// Basic block placement.
if (getOptLevel() != CodeGenOpt::None)
@@ -418,30 +521,31 @@ void TargetPassConfig::addMachinePasses() {
/// Add passes that optimize machine instructions in SSA form.
void TargetPassConfig::addMachineSSAOptimization() {
// Pre-ra tail duplication.
- if (addPass(EarlyTailDuplicateID) != &NoPassID)
+ if (addPass(&EarlyTailDuplicateID))
printAndVerify("After Pre-RegAlloc TailDuplicate");
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
- addPass(OptimizePHIsID);
+ addPass(&OptimizePHIsID);
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
- addPass(LocalStackSlotAllocationID);
+ addPass(&LocalStackSlotAllocationID);
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
- addPass(DeadMachineInstructionElimID);
+ addPass(&DeadMachineInstructionElimID);
printAndVerify("After codegen DCE pass");
- addPass(MachineLICMID);
- addPass(MachineCSEID);
- addPass(MachineSinkingID);
+ addPass(&EarlyIfConverterID);
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+ addPass(&MachineSinkingID);
printAndVerify("After Machine LICM, CSE and Sinking passes");
- addPass(PeepholeOptimizerID);
+ addPass(&PeepholeOptimizerID);
printAndVerify("After codegen peephole optimization pass");
}
@@ -519,10 +623,10 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- addPass(PHIEliminationID);
- addPass(TwoAddressInstructionPassID);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
- PM->add(RegAllocPass);
+ addPass(RegAllocPass);
printAndVerify("After Register Allocation");
}
@@ -530,42 +634,51 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&ProcessImplicitDefsID);
+
// LiveVariables currently requires pure SSA form.
//
// FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
// LiveVariables can be removed completely, and LiveIntervals can be directly
// computed. (We still either need to regenerate kill flags after regalloc, or
// preferably fix the scavenger to not depend on them).
- addPass(LiveVariablesID);
+ addPass(&LiveVariablesID);
// Add passes that move from transformed SSA into conventional SSA. This is a
// "copy coalescing" problem.
//
if (!EnableStrongPHIElim) {
// Edge splitting is smarter with machine loop info.
- addPass(MachineLoopInfoID);
- addPass(PHIEliminationID);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
}
- addPass(TwoAddressInstructionPassID);
- // FIXME: Either remove this pass completely, or fix it so that it works on
- // SSA form. We could modify LiveIntervals to be independent of this pass, But
- // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
- // leaving SSA.
- addPass(ProcessImplicitDefsID);
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID);
+
+ addPass(&TwoAddressInstructionPassID);
if (EnableStrongPHIElim)
- addPass(StrongPHIEliminationID);
+ addPass(&StrongPHIEliminationID);
- addPass(RegisterCoalescerID);
+ addPass(&RegisterCoalescerID);
// PreRA instruction scheduling.
- if (addPass(MachineSchedulerID) != &NoPassID)
+ if (addPass(&MachineSchedulerID))
printAndVerify("After Machine Scheduling");
// Add the selected register allocation pass.
- PM->add(RegAllocPass);
- printAndVerify("After Register Allocation");
+ addPass(RegAllocPass);
+ printAndVerify("After Register Allocation, before rewriter");
+
+ // Allow targets to change the register assignments before rewriting.
+ if (addPreRewrite())
+ printAndVerify("After pre-rewrite passes");
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ printAndVerify("After Virtual Register Rewriter");
// FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
// but eventually, all users of it should probably be moved to addPostRA and
@@ -579,12 +692,12 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
//
// FIXME: Re-enable coloring with register when it's capable of adding
// kill markers.
- addPass(StackSlotColoringID);
+ addPass(&StackSlotColoringID);
// Run post-ra machine LICM to hoist reloads / remats.
//
// FIXME: can this move into MachineLateOptimization?
- addPass(PostRAMachineLICMID);
+ addPass(&PostRAMachineLICMID);
printAndVerify("After StackSlotColoring and postra Machine LICM");
}
@@ -596,33 +709,33 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Branch folding must be run after regalloc and prolog/epilog insertion.
- if (addPass(BranchFolderPassID) != &NoPassID)
+ if (addPass(&BranchFolderPassID))
printAndVerify("After BranchFolding");
// Tail duplication.
- if (addPass(TailDuplicateID) != &NoPassID)
+ if (addPass(&TailDuplicateID))
printAndVerify("After TailDuplicate");
// Copy propagation.
- if (addPass(MachineCopyPropagationID) != &NoPassID)
+ if (addPass(&MachineCopyPropagationID))
printAndVerify("After copy propagation pass");
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- AnalysisID ID = &NoPassID;
+ AnalysisID PassID = 0;
if (!DisableBlockPlacement) {
// MachineBlockPlacement is a new pass which subsumes the functionality of
// CodPlacementOpt. The old code placement pass can be restored by
// disabling block placement, but eventually it will be removed.
- ID = addPass(MachineBlockPlacementID);
+ PassID = addPass(&MachineBlockPlacementID);
} else {
- ID = addPass(CodePlacementOptID);
+ PassID = addPass(&CodePlacementOptID);
}
- if (ID != &NoPassID) {
+ if (PassID) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
- addPass(MachineBlockPlacementStatsID);
+ addPass(&MachineBlockPlacementStatsID);
printAndVerify("After machine block placement.");
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9c5c029..6bc7e37 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -31,6 +31,15 @@
// same flag that the "cmp" instruction sets and that "bz" uses, then we can
// eliminate the "cmp" instruction.
//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
// - Optimize Bitcast pairs:
//
// v1 = bitcast v0
@@ -69,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -95,16 +105,17 @@ namespace {
}
private:
- bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
- bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
- bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
bool isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
};
}
@@ -116,7 +127,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
"Peephole Optimizations", false, false)
-/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
/// a single register and writes a single register and it does not modify the
/// source, and if the source value is preserved as a sub-register of the
/// result, then replace all reachable uses of the source with the subreg of the
@@ -126,7 +137,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
/// the code. Since this code does not currently share EXTRACTs, just ignore all
/// debug uses.
bool PeepholeOptimizer::
-OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
@@ -136,16 +147,30 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
- if (++UI == MRI->use_nodbg_end())
+ if (MRI->hasOneNonDBGUse(SrcReg))
// No other uses.
return false;
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx = TM->getRegisterInfo()->
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
UI != UE; ++UI)
ReachedBBs.insert(UI->getParent());
@@ -156,8 +181,8 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallVector<MachineOperand*, 8> ExtendedUses;
bool ExtendLife = true;
- UI = MRI->use_nodbg_begin(SrcReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
@@ -169,6 +194,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
}
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
// It's an error to translate this:
//
// %reg1025 = <sext> %reg1024
@@ -223,9 +252,9 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Look for PHI uses of the extended result, we don't want to extend the
// liveness of a PHI input. It breaks all kinds of assumptions down
// stream. A PHI use is expected to be the kill of its source values.
- UI = MRI->use_nodbg_begin(DstReg);
for (MachineRegisterInfo::use_nodbg_iterator
- UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+ UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
if (UI->isPHI())
PHIBBs.insert(UI->getParent());
@@ -238,14 +267,20 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
// About to add uses of DstReg, clear DstReg's kill flags.
- if (!Changed)
+ if (!Changed) {
MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
unsigned NewVR = MRI->createVirtualRegister(RC);
- BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
-
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
@@ -255,7 +290,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
-/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
+/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
/// a value cross register classes), and the source is defined by another
/// bitcast instruction B. And if the register class of source of B matches
@@ -265,7 +300,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
/// %vreg3<def> = VMOVRS %vreg0
/// Replace all uses of vreg3 with vreg1.
-bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
+bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
unsigned NumDefs = MI->getDesc().getNumDefs();
unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
@@ -327,22 +362,23 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
return true;
}
-/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// optimizeCmpInstr - If the instruction is a compare and the previous
/// instruction it's comparing against all ready sets (or could be modified to
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
-bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
- unsigned SrcReg;
+ unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
- if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
return false;
// Attempt to optimize the comparison instruction.
- if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
++NumCmps;
return true;
}
@@ -350,6 +386,30 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
return false;
}
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+ unsigned &FoldAsLoadDefReg) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneUse(Reg)) {
+ FoldAsLoadDefReg = Reg;
+ return true;
+ }
+ return false;
+}
+
bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -368,10 +428,10 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
return false;
}
-/// FoldImmediate - Try folding register operands that are defined by move
+/// foldImmediate - Try folding register operands that are defined by move
/// immediate instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
@@ -407,6 +467,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallPtrSet<MachineInstr*, 8> LocalMIs;
SmallSet<unsigned, 4> ImmDefRegs;
DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ unsigned FoldAsLoadDefReg;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
@@ -414,6 +475,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.clear();
ImmDefRegs.clear();
ImmDefMIs.clear();
+ FoldAsLoadDefReg = 0;
bool First = true;
MachineBasicBlock::iterator PMII;
@@ -422,15 +484,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = &*MII;
LocalMIs.insert(MI);
+ // If there exists an instruction which belongs to the following
+ // categories, we will discard the load candidate.
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
MI->hasUnmodeledSideEffects()) {
+ FoldAsLoadDefReg = 0;
++MII;
continue;
}
+ if (MI->mayStore() || MI->isCall())
+ FoldAsLoadDefReg = 0;
if (MI->isBitcast()) {
- if (OptimizeBitcastInstr(MI, MBB)) {
+ if (optimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
@@ -438,7 +505,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
} else if (MI->isCompare()) {
- if (OptimizeCmpInstr(MI, MBB)) {
+ if (optimizeCmpInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
Changed = true;
@@ -450,11 +517,36 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
SeenMoveImm = true;
} else {
- Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
if (SeenMoveImm)
- Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+ Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+ // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+ // can enable folding by converting SUB to CMP.
+ MachineInstr *DefMI = 0;
+ MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+ FoldAsLoadDefReg, DefMI);
+ if (FoldMI) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ ++NumLoadFold;
+
+ // MI is replaced with FoldMI.
+ Changed = true;
+ PMII = FoldMI;
+ MII = llvm::next(PMII);
+ continue;
+ }
+ }
First = false;
PMII = MII;
++MII;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 24d3e5a..7449ff5 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,7 +22,6 @@
#include "AntiDepBreaker.h"
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
@@ -31,6 +30,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -78,7 +78,6 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
- AliasAnalysis *AA;
const TargetInstrInfo *TII;
RegisterClassInfo RegClassInfo;
@@ -206,6 +205,10 @@ SchedulePostRATDList::SchedulePostRATDList(
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
HazardRec =
TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
AntiDepBreak =
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
@@ -423,9 +426,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
unsigned Reg = *I;
LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
else {
@@ -437,9 +439,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
unsigned Reg = *I;
LiveRegs.set(Reg);
// Repeat, for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
}
@@ -464,10 +465,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
MO.setIsKill(false);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
- *Subreg; ++Subreg) {
- if (LiveRegs.test(*Subreg)) {
- MI->addOperand(MachineOperand::CreateReg(*Subreg,
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MI->addOperand(MachineOperand::CreateReg(*SubRegs,
true /*IsDef*/,
true /*IsImp*/,
false /*IsKill*/,
@@ -517,9 +517,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
LiveRegs.reset(Reg);
// Repeat for all subregs.
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.reset(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
}
// Examine all used registers and set/clear kill flag. When a
@@ -536,9 +535,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if (!killedRegs.test(Reg)) {
kill = true;
// A register is not killed if any subregs are live...
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- if (LiveRegs.test(*Subreg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
kill = false;
break;
}
@@ -570,9 +568,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
LiveRegs.set(Reg);
- for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg)
- LiveRegs.set(*Subreg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 1ad3479..34d075c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -9,297 +9,163 @@
#define DEBUG_TYPE "processimplicitdefs"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
-
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
using namespace llvm;
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+};
+} // end anonymous namespace
+
char ProcessImplicitDefs::ID = 0;
char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
"Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- AU.addPreservedID(TwoAddressInstructionPassID);
- AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool
-ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
- unsigned Reg, unsigned OpIdx,
- SmallSet<unsigned, 8> &ImpDefRegs) {
- switch(OpIdx) {
- case 1:
- return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
- ImpDefRegs.count(MI->getOperand(0).getReg()));
- case 2:
- return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
- ImpDefRegs.count(MI->getOperand(0).getReg()));
- default: return false;
- }
-}
-
-static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
- SmallSet<unsigned, 8> &ImpDefRegs) {
- if (MI->isCopy()) {
- MachineOperand &MO0 = MI->getOperand(0);
- MachineOperand &MO1 = MI->getOperand(1);
- if (MO1.getReg() != Reg)
- return false;
- if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
- return true;
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
return false;
- }
- return false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse() && MO->readsReg())
+ return false;
+ return true;
}
-/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
-/// there is one implicit_def for each use. Add isUndef marker to
-/// implicit_def defs and their uses.
-bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
-
- DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
- << "********** Function: "
- << ((Value*)fn.getFunction())->getName() << '\n');
-
- bool Changed = false;
-
- TII = fn.getTarget().getInstrInfo();
- TRI = fn.getTarget().getRegisterInfo();
- MRI = &fn.getRegInfo();
- LV = getAnalysisIfAvailable<LiveVariables>();
-
- SmallSet<unsigned, 8> ImpDefRegs;
- SmallVector<MachineInstr*, 8> ImpDefMIs;
- SmallVector<MachineInstr*, 4> RUses;
- SmallPtrSet<MachineBasicBlock*,16> Visited;
- SmallPtrSet<MachineInstr*, 8> ModInsts;
-
- MachineBasicBlock *Entry = fn.begin();
- for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
- DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
- DFI != E; ++DFI) {
- MachineBasicBlock *MBB = *DFI;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isImplicitDef()) {
- ImpDefMIs.push_back(MI);
- // Is this a sub-register read-modify-write?
- if (MI->getOperand(0).readsReg())
- continue;
- unsigned Reg = MI->getOperand(0).getReg();
- ImpDefRegs.insert(Reg);
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
- ImpDefRegs.insert(*SS);
- }
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ DEBUG(dbgs() << "Processing " << *MI);
+ unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // For virtual regiusters, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineRegisterInfo::use_nodbg_iterator UI =
+ MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineOperand &MO = UI.getOperand();
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
continue;
- }
-
- // Eliminate %reg1032:sub<def> = COPY undef.
- if (MI->isCopy() && MI->getOperand(0).readsReg()) {
- MachineOperand &MO = MI->getOperand(1);
- if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
- if (LV && MO.isKill()) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
- vi.removeKill(MI);
- }
- unsigned Reg = MI->getOperand(0).getReg();
- MI->eraseFromParent();
- Changed = true;
-
- // A REG_SEQUENCE may have been expanded into partial definitions.
- // If this was the last one, mark Reg as implicitly defined.
- if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg))
- ImpDefRegs.insert(Reg);
- continue;
- }
- }
-
- bool ChangedToImpDef = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.readsReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- if (!ImpDefRegs.count(Reg))
- continue;
- // Use is a copy, just turn it into an implicit_def.
- if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) {
- bool isKill = MO.isKill();
- MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
- for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
- MI->RemoveOperand(j);
- if (isKill) {
- ImpDefRegs.erase(Reg);
- if (LV) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(MI);
- }
- }
- ChangedToImpDef = true;
- Changed = true;
- break;
- }
-
- Changed = true;
- MO.setIsUndef();
- // This is a partial register redef of an implicit def.
- // Make sure the whole register is defined by the instruction.
- if (MO.isDef()) {
- MI->addRegisterDefined(Reg);
- continue;
- }
- if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
- // Make sure other reads of Reg are also marked <undef>.
- for (unsigned j = i+1; j != e; ++j) {
- MachineOperand &MOJ = MI->getOperand(j);
- if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
- MOJ.setIsUndef();
- }
- ImpDefRegs.erase(Reg);
- }
- }
-
- if (ChangedToImpDef) {
- // Backtrack to process this new implicit_def.
- --I;
- } else {
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- ImpDefRegs.erase(MO.getReg());
- }
- }
+ DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
}
+ MI->eraseFromParent();
+ return;
+ }
- // Any outstanding liveout implicit_def's?
- for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
- MachineInstr *MI = ImpDefMIs[i];
- unsigned Reg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !ImpDefRegs.count(Reg)) {
- // Delete all "local" implicit_def's. That include those which define
- // physical registers since they cannot be liveout.
- MI->eraseFromParent();
- Changed = true;
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI;
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MIOperands MO(UserMI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
continue;
- }
-
- // If there are multiple defs of the same register and at least one
- // is not an implicit_def, do not insert implicit_def's before the
- // uses.
- bool Skip = false;
- SmallVector<MachineInstr*, 4> DeadImpDefs;
- for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
- DE = MRI->def_end(); DI != DE; ++DI) {
- MachineInstr *DeadImpDef = &*DI;
- if (!DeadImpDef->isImplicitDef()) {
- Skip = true;
- break;
- }
- DeadImpDefs.push_back(DeadImpDef);
- }
- if (Skip)
+ unsigned UserReg = MO->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ !TRI->regsOverlap(Reg, UserReg))
continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO->isUse())
+ MO->setIsUndef();
+ }
+ if (Found)
+ break;
+ }
- // The only implicit_def which we want to keep are those that are live
- // out of its block.
- for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
- DeadImpDefs[j]->eraseFromParent();
- Changed = true;
-
- // Process each use instruction once.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
- UE = MRI->use_end(); UI != UE; ++UI) {
- if (UI.getOperand().isUndef())
- continue;
- MachineInstr *RMI = &*UI;
- if (ModInsts.insert(RMI))
- RUses.push_back(RMI);
- }
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
- for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
- MachineInstr *RMI = RUses[i];
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->RemoveOperand(i);
+ DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
- // Turn a copy use into an implicit_def.
- if (isUndefCopy(RMI, Reg, ImpDefRegs)) {
- RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
- bool isKill = false;
- SmallVector<unsigned, 4> Ops;
- for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &RRMO = RMI->getOperand(j);
- if (RRMO.isReg() && RRMO.getReg() == Reg) {
- Ops.push_back(j);
- if (RRMO.isKill())
- isKill = true;
- }
- }
- // Leave the other operands along.
- for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
- unsigned OpIdx = Ops[j];
- RMI->RemoveOperand(OpIdx-j);
- }
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: "
+ << ((Value*)MF.getFunction())->getName() << '\n');
- // Update LiveVariables varinfo if the instruction is a kill.
- if (LV && isKill) {
- LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
- vi.removeKill(RMI);
- }
- continue;
- }
+ bool Changed = false;
- // Replace Reg with a new vreg that's marked implicit.
- const TargetRegisterClass* RC = MRI->getRegClass(Reg);
- unsigned NewVReg = MRI->createVirtualRegister(RC);
- bool isKill = true;
- for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
- MachineOperand &RRMO = RMI->getOperand(j);
- if (RRMO.isReg() && RRMO.getReg() == Reg) {
- RRMO.setReg(NewVReg);
- RRMO.setIsUndef();
- if (isKill) {
- // Only the first operand of NewVReg is marked kill.
- RRMO.setIsKill();
- isKill = false;
- }
- }
- }
- }
- RUses.clear();
- ModInsts.clear();
- }
- ImpDefRegs.clear();
- ImpDefMIs.clear();
+ TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ // Scan the basic block for implicit defs.
+ for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
+ if (MBBI->isImplicitDef())
+ WorkList.insert(MBBI);
+
+ if (WorkList.empty())
+ continue;
+
+ DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
}
-
return Changed;
}
-
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 458915e..c791ffb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -302,7 +302,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
- if (! ShrinkWrapThisFunction) {
+ if (!ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index b00eceb..993dbc7 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "regalloc"
#include "RegAllocBase.h"
+#include "LiveRegMatrix.h"
#include "Spiller.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
@@ -34,8 +35,6 @@
using namespace llvm;
-STATISTIC(NumAssigned , "Number of registers assigned");
-STATISTIC(NumUnassigned , "Number of registers unassigned");
STATISTIC(NumNewQueued , "Number of new live ranges queued");
// Temporary verification option until we can put verification inside
@@ -47,85 +46,20 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
const char *RegAllocBase::TimerGroupName = "Register Allocation";
bool RegAllocBase::VerifyEnabled = false;
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
- LiveVirtRegBitSet VisitedVRegs;
- OwningArrayPtr<LiveVirtRegBitSet>
- unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
- // Verify disjoint unions.
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
- LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
- PhysReg2LiveUnion[PhysReg].verify(VRegs);
- // Union + intersection test could be done efficiently in one pass, but
- // don't add a method to SparseBitVector unless we really need it.
- assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
- VisitedVRegs |= VRegs;
- }
-
- // Verify vreg coverage.
- for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
- liItr != liEnd; ++liItr) {
- unsigned reg = liItr->first;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
- if (!VRM->hasPhys(reg)) continue; // spilled?
- unsigned PhysReg = VRM->getPhys(reg);
- if (!unionVRegs[PhysReg].test(reg)) {
- dbgs() << "LiveVirtReg " << reg << " not in union " <<
- TRI->getName(PhysReg) << "\n";
- llvm_unreachable("unallocated live vreg");
- }
- }
- // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
//===----------------------------------------------------------------------===//
// RegAllocBase Implementation
//===----------------------------------------------------------------------===//
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
- unsigned NRegs) {
- NumRegs = NRegs;
- Array =
- static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
- for (unsigned r = 0; r != NRegs; ++r)
- new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
- NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+void RegAllocBase::init(VirtRegMap &vrm,
+ LiveIntervals &lis,
+ LiveRegMatrix &mat) {
TRI = &vrm.getTargetRegInfo();
MRI = &vrm.getRegInfo();
VRM = &vrm;
LIS = &lis;
+ Matrix = &mat;
MRI->freezeReservedRegs(vrm.getMachineFunction());
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
- const unsigned NumRegs = TRI->getNumRegs();
- if (NumRegs != PhysReg2LiveUnion.numRegs()) {
- PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
- // Cache an interferece query for each physical reg
- Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
- }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
- if (!Array)
- return;
- for (unsigned r = 0; r != NumRegs; ++r)
- Array[r].~LiveIntervalUnion();
- free(Array);
- NumRegs = 0;
- Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
- for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
- PhysReg2LiveUnion[r].clear();
}
// Visit all the live registers. If they are already assigned to a physical
@@ -133,35 +67,14 @@ void RegAllocBase::releaseMemory() {
// them on the priority queue for later assignment.
void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
- unsigned RegNum = I->first;
- LiveInterval &VirtReg = *I->second;
- if (TargetRegisterInfo::isPhysicalRegister(RegNum))
- PhysReg2LiveUnion[RegNum].unify(VirtReg);
- else
- enqueue(&VirtReg);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
}
}
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
- << " to " << PrintReg(PhysReg, TRI) << '\n');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
- MRI->setPhysRegUsed(PhysReg);
- PhysReg2LiveUnion[PhysReg].unify(VirtReg);
- ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
- DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
- << " from " << PrintReg(PhysReg, TRI) << '\n');
- assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
- PhysReg2LiveUnion[PhysReg].extract(VirtReg);
- VRM->clearVirt(VirtReg.reg);
- ++NumUnassigned;
-}
-
// Top-level driver to manage the queue of unassigned VirtRegs and call the
// selectOrSplit implementation.
void RegAllocBase::allocatePhysRegs() {
@@ -179,14 +92,14 @@ void RegAllocBase::allocatePhysRegs() {
}
// Invalidate all interference queries, live ranges could have changed.
- invalidateVirtRegs();
+ Matrix->invalidateVirtRegs();
// selectOrSplit requests the allocator to return an available physical
// register if possible and populate a list of new live intervals that
// result from splitting.
DEBUG(dbgs() << "\nselectOrSplit "
<< MRI->getRegClass(VirtReg->reg)->getName()
- << ':' << *VirtReg << '\n');
+ << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n');
typedef SmallVector<LiveInterval*, 4> VirtRegVec;
VirtRegVec SplitVRegs;
unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
@@ -211,7 +124,7 @@ void RegAllocBase::allocatePhysRegs() {
}
if (AvailablePhysReg)
- assign(*VirtReg, AvailablePhysReg);
+ Matrix->assign(*VirtReg, AvailablePhysReg);
for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
I != E; ++I) {
@@ -230,51 +143,3 @@ void RegAllocBase::allocatePhysRegs() {
}
}
}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- if (query(VirtReg, *AliasI).checkInterference())
- return *AliasI;
- return 0;
-}
-
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
- NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
- SlotIndexes *Indexes = LIS->getSlotIndexes();
- if (MF->size() <= 1)
- return;
-
- LiveIntervalUnion::SegmentIter SI;
- for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
- LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
- if (LiveUnion.empty())
- continue;
- DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
- MachineFunction::iterator MBB = llvm::next(MF->begin());
- MachineFunction::iterator MFE = MF->end();
- SlotIndex Start, Stop;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.setMap(LiveUnion.getMap());
- SI.find(Start);
- while (SI.valid()) {
- if (SI.start() <= Start) {
- if (!MBB->isLiveIn(PhysReg))
- MBB->addLiveIn(PhysReg);
- DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
- << PrintReg(SI.value()->reg, TRI));
- } else if (SI.start() > Stop)
- MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
- if (++MBB == MFE)
- break;
- tie(Start, Stop) = Indexes->getMBBRange(MBB);
- SI.advanceTo(Start);
- }
- DEBUG(dbgs() << '\n');
- }
-}
-
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 072fe2b..db0c8e1 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,9 +37,9 @@
#ifndef LLVM_CODEGEN_REGALLOCBASE
#define LLVM_CODEGEN_REGALLOCBASE
-#include "llvm/ADT/OwningPtr.h"
#include "LiveIntervalUnion.h"
-#include "RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/OwningPtr.h"
namespace llvm {
@@ -47,6 +47,7 @@ template<typename T> class SmallVectorImpl;
class TargetRegisterInfo;
class VirtRegMap;
class LiveIntervals;
+class LiveRegMatrix;
class Spiller;
/// RegAllocBase provides the register allocation driver and interface that can
@@ -56,69 +57,20 @@ class Spiller;
/// live range splitting. They must also override enqueue/dequeue to provide an
/// assignment order.
class RegAllocBase {
- LiveIntervalUnion::Allocator UnionAllocator;
-
- // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual
- // registers may have changed.
- unsigned UserTag;
-
- // Array of LiveIntervalUnions indexed by physical register.
- class LiveUnionArray {
- unsigned NumRegs;
- LiveIntervalUnion *Array;
- public:
- LiveUnionArray(): NumRegs(0), Array(0) {}
- ~LiveUnionArray() { clear(); }
-
- unsigned numRegs() const { return NumRegs; }
-
- void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
-
- void clear();
-
- LiveIntervalUnion& operator[](unsigned PhysReg) {
- assert(PhysReg < NumRegs && "physReg out of bounds");
- return Array[PhysReg];
- }
- };
-
- LiveUnionArray PhysReg2LiveUnion;
-
- // Current queries, one per physreg. They must be reinitialized each time we
- // query on a new live virtual register.
- OwningArrayPtr<LiveIntervalUnion::Query> Queries;
-
protected:
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
VirtRegMap *VRM;
LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
- RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {}
virtual ~RegAllocBase() {}
// A RegAlloc pass should call this before allocatePhysRegs.
- void init(VirtRegMap &vrm, LiveIntervals &lis);
-
- // Get an initialized query to check interferences between lvr and preg. Note
- // that Query::init must be called at least once for each physical register
- // before querying a new live virtual register. This ties Queries and
- // PhysReg2LiveUnion together.
- LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
- Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]);
- return Queries[PhysReg];
- }
-
- // Get direct access to the underlying LiveIntervalUnion for PhysReg.
- LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
- return PhysReg2LiveUnion[PhysReg];
- }
-
- // Invalidate all cached information about virtual registers - live ranges may
- // have changed.
- void invalidateVirtRegs() { ++UserTag; }
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
// The top-level driver. The output is a VirtRegMap that us updated with
// physical register assignments.
@@ -140,31 +92,6 @@ protected:
virtual unsigned selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
- // A RegAlloc pass should call this when PassManager releases its memory.
- virtual void releaseMemory();
-
- // Helper for checking interference between a live virtual register and a
- // physical register, including all its register aliases. If an interference
- // exists, return the interfering register, which may be preg or an alias.
- unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
-
- /// assign - Assign VirtReg to PhysReg.
- /// This should not be called from selectOrSplit for the current register.
- void assign(LiveInterval &VirtReg, unsigned PhysReg);
-
- /// unassign - Undo a previous assignment of VirtReg to PhysReg.
- /// This can be invoked from selectOrSplit, but be careful to guarantee that
- /// allocation is making progress.
- void unassign(LiveInterval &VirtReg, unsigned PhysReg);
-
- /// addMBBLiveIns - Add physreg liveins to basic blocks.
- void addMBBLiveIns(MachineFunction *);
-
-#ifndef NDEBUG
- // Verify each LiveIntervalUnion.
- void verify();
-#endif
-
// Use this group name for NamedRegionTimer.
static const char *TimerGroupName;
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 77ee314..3a03807 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,11 +13,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
#include "RegAllocBase.h"
#include "LiveDebugVariables.h"
-#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
+#include "LiveRegMatrix.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
@@ -64,10 +65,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
// context
MachineFunction *MF;
- // analyses
- LiveStacks *LS;
- RenderMachineFunction *RMF;
-
// state
std::auto_ptr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
@@ -118,9 +115,6 @@ public:
bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs);
- void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
static char ID;
};
@@ -139,7 +133,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -147,6 +141,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
@@ -159,41 +154,15 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
- DEBUG(AU.addRequired<RenderMachineFunction>());
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
MachineFunctionPass::getAnalysisUsage(AU);
}
void RABasic::releaseMemory() {
SpillerInstance.reset(0);
- RegAllocBase::releaseMemory();
}
-// Helper for spillInterferences() that spills all interfering vregs currently
-// assigned to this physical register.
-void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
- SmallVectorImpl<LiveInterval*> &SplitVRegs) {
- LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
- assert(Q.seenAllInterferences() && "need collectInterferences()");
- const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
-
- for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
- E = PendingSpills.end(); I != E; ++I) {
- LiveInterval &SpilledVReg = **I;
- DEBUG(dbgs() << "extracting from " <<
- TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
-
- // Deallocate the interfering vreg by removing it from the union.
- // A LiveInterval instance may not be in a union during modification!
- unassign(SpilledVReg, PhysReg);
-
- // Spill the extracted interval.
- LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
- spiller().spill(LRE);
- }
- // After extracting segments, the query's results are invalid. But keep the
- // contents valid until we're done accessing pendingSpills.
- Q.clear();
-}
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
@@ -202,22 +171,41 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
- unsigned NumInterferences = 0;
+ SmallVector<LiveInterval*, 8> Intfs;
+
// Collect interferences assigned to any alias of the physical register.
- for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
- LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
- NumInterferences += QAlias.collectInterferingVRegs();
- if (QAlias.seenUnspillableVReg()) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
return false;
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ return false;
+ Intfs.push_back(Intf);
}
}
DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
" interferences with " << VirtReg << "\n");
- assert(NumInterferences > 0 && "expect interference");
+ assert(!Intfs.empty() && "expected interference");
// Spill each interfering vreg allocated to PhysReg or an alias.
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
- spillReg(VirtReg, *AliasI, SplitVRegs);
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ spiller().spill(LRE);
+ }
return true;
}
@@ -235,49 +223,36 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
// selectOrSplit().
unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &SplitVRegs) {
- // Check for register mask interference. When live ranges cross calls, the
- // set of usable registers is reduced to the callee-saved ones.
- bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
-
// Populate a list of physical register spill candidates.
SmallVector<unsigned, 8> PhysRegSpillCands;
// Check for an available register in this class.
- ArrayRef<unsigned> Order =
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg));
- for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E;
- ++I) {
- unsigned PhysReg = *I;
-
- // If PhysReg is clobbered by a register mask, it isn't useful for
- // allocation or spilling.
- if (CrossRegMasks && !UsableRegs.test(PhysReg))
- continue;
-
- // Check interference and as a side effect, intialize queries for this
- // VirtReg and its aliases.
- unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
- if (interfReg == 0) {
- // Found an available register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+ while (unsigned PhysReg = Order.next()) {
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
return PhysReg;
- }
- LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
- IntfQ.collectInterferingVRegs(1);
- LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
- // The current VirtReg must either be spillable, or one of its interferences
- // must have less spill weight.
- if (interferingVirtReg->weight < VirtReg.weight ) {
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
}
}
+
// Try to spill another interfering reg with less spill weight.
for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
- PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
-
- if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ continue;
- assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
"Interference after spill.");
// Tell the caller to allocate to this newly freed physical register.
return *PhysRegI;
@@ -287,7 +262,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -301,53 +276,17 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
<< ((Value*)mf.getFunction())->getName() << '\n');
MF = &mf;
- DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
-
- RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
- addMBBLiveIns(MF);
-
// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
- // optional HTML output
- DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
-
- // FIXME: Verification currently must run before VirtRegRewriter. We should
- // make the rewriter a separate pass and override verifyAnalysis instead. When
- // that happens, verification naturally falls under VerifyMachineCode.
-#ifndef NDEBUG
- if (VerifyEnabled) {
- // Verify accuracy of LiveIntervals. The standard machine code verifier
- // ensures that each LiveIntervals covers all uses of the virtual reg.
-
- // FIXME: MachineVerifier is badly broken when using the standard
- // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
- // inline spiller, some tests fail to verify because the coalescer does not
- // always generate verifiable code.
- MF->verify(this, "In RABasic::verify");
-
- // Verify that LiveIntervals are partitioned into unions and disjoint within
- // the unions.
- verify();
- }
-#endif // !NDEBUG
-
- // Run rewriter
- VRM->rewrite(LIS->getSlotIndexes());
-
- // Write out new DBG_VALUE instructions.
- getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers and release all the transient data.
- VRM->clearAllVirt();
- MRI->clearVirtRegs();
releaseMemory();
-
return true;
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index e09b7f8..6b3a48e 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "RegisterClassInfo.h"
#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
@@ -77,7 +77,7 @@ namespace {
explicit LiveReg(unsigned v)
: LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
- unsigned getSparseSetKey() const {
+ unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
}
};
@@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
/// its virtual register, and it is guaranteed to be a block-local register.
///
bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
- // Check for non-debug uses or defs following MO.
- // This is the most likely way to fail - fast path it.
- MachineOperand *Next = &MO;
- while ((Next = Next->getNextOperandForReg()))
- if (!Next->isDebug())
- return false;
-
// If the register has ever been spilled or reloaded, we conservatively assume
// it is a global register used in multiple blocks.
if (StackSlotForVirtReg[MO.getReg()] != -1)
return false;
// Check that the use/def chain has exactly one operand - MO.
- return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&I.getOperand() != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
}
/// addKillFlag - Set kill flags on last use of a virtual register.
@@ -354,8 +350,8 @@ void RAFast::usePhysReg(MachineOperand &MO) {
}
// Maybe a superregister is reserved?
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
switch (PhysRegState[Alias]) {
case regDisabled:
break;
@@ -408,8 +404,8 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// This is a disabled register, disable all aliases.
PhysRegState[PhysReg] = NewState;
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
@@ -456,8 +452,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
// This is a disabled register, add up cost of aliases.
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
unsigned Cost = 0;
- for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS) {
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
if (UsedInInstr.test(Alias))
return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
@@ -659,9 +655,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
// Return true if the operand kills its register.
bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
MachineOperand &MO = MI->getOperand(OpNum);
+ bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
- return MO.isKill() || MO.isDead();
+ return MO.isKill() || Dead;
}
// Handle subregister index.
@@ -674,7 +671,13 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
MI->addRegisterKilled(PhysReg, TRI, true);
return true;
}
- return MO.isDead();
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef())
+ MI->addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
}
// Handle special instruction operand like early clobbers and tied ops when
@@ -704,13 +707,10 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- UsedInInstr.set(Reg);
- if (ThroughRegs.count(PhysRegState[Reg]))
- definePhysReg(MI, Reg, regFree);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
- UsedInInstr.set(*AS);
- if (ThroughRegs.count(PhysRegState[*AS]))
- definePhysReg(MI, *AS, regFree);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ UsedInInstr.set(*AI);
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(MI, *AI, regFree);
}
}
@@ -1029,9 +1029,8 @@ void RAFast::AllocateBasicBlock() {
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
- UsedInInstr.set(Reg);
- for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
- UsedInInstr.set(*AS);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UsedInInstr.set(*AI);
}
}
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 3f2a617..6ac5428 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,6 +16,7 @@
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
+#include "LiveRegMatrix.h"
#include "RegAllocBase.h"
#include "Spiller.h"
#include "SpillPlacement.h"
@@ -73,7 +74,6 @@ class RAGreedy : public MachineFunctionPass,
// analyses
SlotIndexes *Indexes;
- LiveStacks *LS;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
EdgeBundles *Bundles;
@@ -168,19 +168,6 @@ class RAGreedy : public MachineFunctionPass,
}
};
- // Register mask interference. The current VirtReg is checked for register
- // mask interference on entry to selectOrSplit(). If there is no
- // interference, UsableRegs is left empty. If there is interference,
- // UsableRegs has a bit mask of registers that can be used without register
- // mask interference.
- BitVector UsableRegs;
-
- /// clobberedByRegMask - Returns true if PhysReg is not directly usable
- /// because of register mask clobbers.
- bool clobberedByRegMask(unsigned PhysReg) const {
- return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
- }
-
// splitting state.
std::auto_ptr<SplitAnalysis> SA;
std::auto_ptr<SplitEditor> SE;
@@ -286,6 +273,8 @@ private:
SmallVectorImpl<LiveInterval*>&);
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
+ unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<LiveInterval*>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
@@ -327,6 +316,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
}
@@ -336,6 +326,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
@@ -349,6 +340,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -360,8 +353,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
//===----------------------------------------------------------------------===//
bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
- if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
- unassign(LIS->getInterval(VirtReg), PhysReg);
+ if (VRM->hasPhys(VirtReg)) {
+ Matrix->unassign(LIS->getInterval(VirtReg));
return true;
}
// Unassigned virtreg is probably in the priority queue.
@@ -370,13 +363,12 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
}
void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
- unsigned PhysReg = VRM->getPhys(VirtReg);
- if (!PhysReg)
+ if (!VRM->hasPhys(VirtReg))
return;
// Register is assigned, put it back on the queue for reassignment.
LiveInterval &LI = LIS->getInterval(VirtReg);
- unassign(LI, PhysReg);
+ Matrix->unassign(LI);
enqueue(&LI);
}
@@ -398,7 +390,6 @@ void RAGreedy::releaseMemory() {
SpillerInstance.reset(0);
ExtraRegInfo.clear();
GlobalCand.clear();
- RegAllocBase::releaseMemory();
}
void RAGreedy::enqueue(LiveInterval *LI) {
@@ -450,12 +441,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Order.rewind();
unsigned PhysReg;
- while ((PhysReg = Order.next())) {
- if (clobberedByRegMask(PhysReg))
- continue;
- if (!checkPhysRegInterference(VirtReg, PhysReg))
+ while ((PhysReg = Order.next()))
+ if (!Matrix->checkInterference(VirtReg, PhysReg))
break;
- }
if (!PhysReg || Order.isHint(PhysReg))
return PhysReg;
@@ -464,7 +452,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
- if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
+ if (Order.isHint(Hint)) {
DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
EvictionCost MaxCost(1);
if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -527,6 +515,10 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @returns True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
bool IsHint, EvictionCost &MaxCost) {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
// Find VirtReg's cascade number. This will be unassigned if VirtReg was never
// involved in an eviction before. If a cascade number was assigned, deny
// evicting anything with the same or a newer cascade number. This prevents
@@ -539,8 +531,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
Cascade = NextCascade;
EvictionCost Cost;
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
- LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
if (Q.collectInterferingVRegs(10) >= 10)
return false;
@@ -548,15 +540,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Check if any interfering live range is heavier than MaxWeight.
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
- return false;
+ assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ "Only expecting virtual register interference from query");
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
// Once a live range becomes small enough, it is urgent that we find a
// register for it. This is indicated by an infinite spill weight. These
// urgent live ranges get to evict almost anything.
- bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent = !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
// Only evict older cascades or live ranges without a cascade.
unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
if (Cascade <= IntfCascade) {
@@ -597,19 +595,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
- for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
- LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+
+ // Collect all interfering virtregs first.
+ SmallVector<LiveInterval*, 8> Intfs;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
- for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
- LiveInterval *Intf = Q.interferingVRegs()[i];
- unassign(*Intf, VRM->getPhys(Intf->reg));
- assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
- VirtReg.isSpillable() < Intf->isSpillable()) &&
- "Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg].Cascade = Cascade;
- ++NumEvicted;
- NewVRegs.push_back(Intf);
- }
+ ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval *Intf = Intfs[i];
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg))
+ continue;
+ Matrix->unassign(*Intf);
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf);
}
}
@@ -636,8 +644,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
Order.rewind();
while (unsigned PhysReg = Order.next()) {
- if (clobberedByRegMask(PhysReg))
- continue;
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1183,7 +1189,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
// Prepare split editor.
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1231,7 +1237,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1265,6 +1271,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
}
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // There is no point to this if there are no larger sub-classes.
+ if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+ return 0;
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+ // Split around every non-copy instruction.
+ for (unsigned i = 0; i != Uses.size(); ++i) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ if (MI->isFullCopy()) {
+ DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ continue;
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Assign all new registers to RS_Spill. This was the last chance.
+ setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+
//===----------------------------------------------------------------------===//
// Local Splitting
//===----------------------------------------------------------------------===//
@@ -1291,9 +1356,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
- if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
- .checkInterference())
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
+ .checkInterference())
continue;
// We know that VirtReg is a continuous interval from FirstInstr to
@@ -1303,7 +1368,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
// surrounding the instruction. The exception is interference before
// StartIdx and after StopIdx.
//
- LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[*Units] .find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1323,6 +1389,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
}
}
+
+ // Add fixed interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveInterval &LI = LIS->getRegUnit(*Units);
+ LiveInterval::const_iterator I = LI.find(StartIdx);
+ LiveInterval::const_iterator E = LI.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = HUGE_VALF;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
}
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
@@ -1355,7 +1445,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// If VirtReg is live across any register mask operands, compute a list of
// gaps with register masks.
SmallVector<unsigned, 8> RegMaskGaps;
- if (!UsableRegs.empty()) {
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
// Get regmask slots for the whole block.
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
DEBUG(dbgs() << RMS.size() << " regmasks in block:");
@@ -1417,7 +1507,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
calcGapWeights(PhysReg, GapWeight);
// Remove any gaps with regmask clobbers.
- if (clobberedByRegMask(PhysReg))
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
GapWeight[RegMaskGaps[i]] = HUGE_VALF;
@@ -1512,7 +1602,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
SE->reset(LREdit);
SE->openIntv();
@@ -1561,7 +1651,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (LIS->intervalIsInOneMBB(VirtReg)) {
NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
SA->analyze(&VirtReg);
- return tryLocalSplit(VirtReg, Order, NewVRegs);
+ unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
}
NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
@@ -1574,7 +1667,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// an assertion when the coalescer is fixed.
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
- invalidateVirtRegs();
+ Matrix->invalidateVirtRegs();
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
}
@@ -1599,11 +1692,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
- // Check if VirtReg is live across any calls.
- UsableRegs.clear();
- if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
- DEBUG(dbgs() << "Live across regmasks.\n");
-
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1644,7 +1732,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
// Finally spill VirtReg itself.
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -1665,7 +1753,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
- RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
@@ -1679,30 +1769,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
- IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
allocatePhysRegs();
- addMBBLiveIns(MF);
- LIS->addKillFlags();
-
- // Run rewriter
- {
- NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
- VRM->rewrite(Indexes);
- }
-
- // Write out new DBG_VALUE instructions.
- {
- NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled);
- DebugVars->emitDebugValues(VRM);
- }
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers and release all the transient data.
- VRM->clearAllVirt();
- MRI->clearVirtRegs();
releaseMemory();
-
return true;
}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index a284614..d0db26b 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,7 +31,6 @@
#define DEBUG_TYPE "regalloc"
-#include "RenderMachineFunction.h"
#include "Spiller.h"
#include "VirtRegMap.h"
#include "RegisterCoalescer.h"
@@ -98,7 +97,6 @@ public:
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
}
/// Return the pass name.
@@ -134,7 +132,6 @@ private:
const TargetInstrInfo *tii;
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
- RenderMachineFunction *rmf;
std::auto_ptr<Spiller> spiller;
LiveIntervals *lis;
@@ -196,7 +193,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
const RegSet &vregs) {
typedef std::vector<const LiveInterval*> LIVector;
- ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
+ LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
MachineRegisterInfo *mri = &mf->getRegInfo();
const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
@@ -205,12 +202,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
RegSet pregs;
// Collect the set of preg intervals, record that they're used in the MF.
- for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
- if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
- pregs.insert(itr->first);
- mri->setPhysRegUsed(itr->first);
- }
+ for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) {
+ if (mri->def_empty(Reg))
+ continue;
+ pregs.insert(Reg);
+ mri->setPhysRegUsed(Reg);
}
BitVector reservedRegs = tri->getReservedRegs(*mf);
@@ -220,7 +216,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
vregItr != vregEnd; ++vregItr) {
unsigned vreg = *vregItr;
const TargetRegisterClass *trc = mri->getRegClass(vreg);
- const LiveInterval *vregLI = &lis->getInterval(vreg);
+ LiveInterval *vregLI = &LIS->getInterval(vreg);
+
+ // Record any overlaps with regmask operands.
+ BitVector regMaskOverlaps(tri->getNumRegs());
+ LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
// Compute an initial allowed set for the current vreg.
typedef std::vector<unsigned> VRAllowed;
@@ -228,80 +228,26 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
- if (!reservedRegs.test(preg)) {
- vrAllowed.push_back(preg);
- }
- }
-
- RegSet overlappingPRegs;
-
- // Record physical registers whose ranges overlap.
- for (RegSet::const_iterator pregItr = pregs.begin(),
- pregEnd = pregs.end();
- pregItr != pregEnd; ++pregItr) {
- unsigned preg = *pregItr;
- const LiveInterval *pregLI = &lis->getInterval(preg);
-
- if (pregLI->empty()) {
+ if (reservedRegs.test(preg))
continue;
- }
- if (vregLI->overlaps(*pregLI))
- overlappingPRegs.insert(preg);
- }
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg))
+ continue;
- // Record any overlaps with regmask operands.
- BitVector regMaskOverlaps(tri->getNumRegs());
- for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
- rmEnd = regMaskSlots.end();
- rmItr != rmEnd; ++rmItr) {
- SlotIndex rmIdx = *rmItr;
- if (vregLI->liveAt(rmIdx)) {
- MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
- const uint32_t* regMask = 0;
- for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
- mopEnd = rmMI->operands_end();
- mopItr != mopEnd; ++mopItr) {
- if (mopItr->isRegMask()) {
- regMask = mopItr->getRegMask();
- break;
- }
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) {
+ if (vregLI->overlaps(LIS->getRegUnit(*Units))) {
+ Interference = true;
+ break;
}
- assert(regMask != 0 && "Couldn't find register mask.");
- regMaskOverlaps.setBitsNotInMask(regMask);
}
- }
+ if (Interference)
+ continue;
- for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
- if (regMaskOverlaps.test(preg))
- overlappingPRegs.insert(preg);
- }
-
- for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
- pregEnd = overlappingPRegs.end();
- pregItr != pregEnd; ++pregItr) {
- unsigned preg = *pregItr;
-
- // Remove the register from the allowed set.
- VRAllowed::iterator eraseItr =
- std::find(vrAllowed.begin(), vrAllowed.end(), preg);
-
- if (eraseItr != vrAllowed.end()) {
- vrAllowed.erase(eraseItr);
- }
-
- // Also remove any aliases.
- const uint16_t *aliasItr = tri->getAliasSet(preg);
- if (aliasItr != 0) {
- for (; *aliasItr != 0; ++aliasItr) {
- VRAllowed::iterator eraseItr =
- std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
-
- if (eraseItr != vrAllowed.end()) {
- vrAllowed.erase(eraseItr);
- }
- }
- }
+ // preg is usable for this virtual register.
+ vrAllowed.push_back(preg);
}
// Construct the node.
@@ -379,7 +325,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
PBQP::Graph &g = p->getGraph();
const TargetMachine &tm = mf->getTarget();
- CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+ CoalescerPair cp(*tm.getRegisterInfo());
// Scan the machine function and add a coalescing cost whenever CoalescerPair
// gives the Ok.
@@ -498,21 +444,17 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
au.addRequired<VirtRegMap>();
- au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
}
void RegAllocPBQP::findVRegIntervalsToAlloc() {
// Iterate over all live ranges.
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
-
- // Ignore physical ones.
- if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (mri->reg_nodbg_empty(Reg))
continue;
-
- LiveInterval *li = itr->second;
+ LiveInterval *li = &lis->getInterval(Reg);
// If this live interval is non-empty we will use pbqp to allocate it.
// Empty intervals we allocate in a simple post-processing stage in
@@ -544,16 +486,17 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
if (problem.isPRegOption(vreg, alloc)) {
unsigned preg = problem.getPRegForOption(vreg, alloc);
- DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> "
+ << tri->getName(preg) << "\n");
assert(preg != 0 && "Invalid preg selected.");
vrm->assignVirt2Phys(vreg, preg);
} else if (problem.isSpillOption(vreg, alloc)) {
vregsToAlloc.erase(vreg);
SmallVector<LiveInterval*, 8> newSpills;
- LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+ LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
spiller->spill(LRE);
- DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: "
<< LRE.getParent().weight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
@@ -561,7 +504,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
itr != end; ++itr) {
assert(!(*itr)->empty() && "Empty spill range.");
- DEBUG(dbgs() << (*itr)->reg << " ");
+ DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " ");
vregsToAlloc.insert((*itr)->reg);
}
@@ -579,9 +522,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
void RegAllocPBQP::finalizeAlloc() const {
- typedef LiveIntervals::iterator LIIterator;
- typedef LiveInterval::Ranges::const_iterator LRIterator;
-
// First allocate registers for the empty intervals.
for (RegSet::const_iterator
itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
@@ -597,51 +537,6 @@ void RegAllocPBQP::finalizeAlloc() const {
vrm->assignVirt2Phys(li->reg, physReg);
}
-
- // Finally iterate over the basic blocks to compute and set the live-in sets.
- SmallVector<MachineBasicBlock*, 8> liveInMBBs;
- MachineBasicBlock *entryMBB = &*mf->begin();
-
- for (LIIterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = liItr->second;
- unsigned reg = 0;
-
- // Get the physical register for this interval
- if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
- reg = li->reg;
- } else if (vrm->isAssignedReg(li->reg)) {
- reg = vrm->getPhys(li->reg);
- } else {
- // Ranges which are assigned a stack slot only are ignored.
- continue;
- }
-
- if (reg == 0) {
- // Filter out zero regs - they're for intervals that were spilled.
- continue;
- }
-
- // Iterate over the ranges of the current interval...
- for (LRIterator lrItr = li->begin(), lrEnd = li->end();
- lrItr != lrEnd; ++lrItr) {
-
- // Find the set of basic blocks which this range is live into...
- if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
- // And add the physreg for this interval to their live-in sets.
- for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
- if (liveInMBBs[i] != entryMBB) {
- if (!liveInMBBs[i]->isLiveIn(reg)) {
- liveInMBBs[i]->addLiveIn(reg);
- }
- }
- }
- liveInMBBs.clear();
- }
- }
- }
-
}
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
@@ -655,7 +550,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
- rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
spiller.reset(createInlineSpiller(*this, MF, *vrm));
@@ -719,22 +613,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Finalise allocation, allocate empty ranges.
finalizeAlloc();
-
- rmf->renderMachineFunction("After PBQP register allocation.", vrm);
-
vregsToAlloc.clear();
emptyIntervalVRegs.clear();
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
- // Run rewriter
- vrm->rewrite(lis->getSlotIndexes());
-
- // All machine operands and other references to virtual registers have been
- // replaced. Remove the virtual registers.
- vrm->clearAllVirt();
- mri->clearVirtRegs();
-
return true;
}
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 17165fa..652bc30 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "RegisterClassInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,9 +50,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
CSRNum.clear();
CSRNum.resize(TRI->getNumRegs(), 0);
for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (const uint16_t *AS = TRI->getOverlaps(Reg);
- unsigned Alias = *AS; ++AS)
- CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
Update = true;
}
CalleeSaved = CSR;
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
deleted file mode 100644
index 400e1f4..0000000
--- a/lib/CodeGen/RegisterClassInfo.h
+++ /dev/null
@@ -1,132 +0,0 @@
-//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the RegisterClassInfo class which provides dynamic
-// information about target register classes. Callee saved and reserved
-// registers depends on calling conventions and other dynamic information, so
-// some things cannot be determined statically.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H
-#define LLVM_CODEGEN_REGISTERCLASSINFO_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
-
-class RegisterClassInfo {
- struct RCInfo {
- unsigned Tag;
- unsigned NumRegs;
- bool ProperSubClass;
- OwningArrayPtr<unsigned> Order;
-
- RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
- operator ArrayRef<unsigned>() const {
- return makeArrayRef(Order.get(), NumRegs);
- }
- };
-
- // Brief cached information for each register class.
- OwningArrayPtr<RCInfo> RegClass;
-
- // Tag changes whenever cached information needs to be recomputed. An RCInfo
- // entry is valid when its tag matches.
- unsigned Tag;
-
- const MachineFunction *MF;
- const TargetRegisterInfo *TRI;
-
- // Callee saved registers of last MF. Assumed to be valid until the next
- // runOnFunction() call.
- const uint16_t *CalleeSaved;
-
- // Map register number to CalleeSaved index + 1;
- SmallVector<uint8_t, 4> CSRNum;
-
- // Reserved registers in the current MF.
- BitVector Reserved;
-
- // Compute all information about RC.
- void compute(const TargetRegisterClass *RC) const;
-
- // Return an up-to-date RCInfo for RC.
- const RCInfo &get(const TargetRegisterClass *RC) const {
- const RCInfo &RCI = RegClass[RC->getID()];
- if (Tag != RCI.Tag)
- compute(RC);
- return RCI;
- }
-
-public:
- RegisterClassInfo();
-
- /// runOnFunction - Prepare to answer questions about MF. This must be called
- /// before any other methods are used.
- void runOnMachineFunction(const MachineFunction &MF);
-
- /// getNumAllocatableRegs - Returns the number of actually allocatable
- /// registers in RC in the current function.
- unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const {
- return get(RC).NumRegs;
- }
-
- /// getOrder - Returns the preferred allocation order for RC. The order
- /// contains no reserved registers, and registers that alias callee saved
- /// registers come last.
- ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
- return get(RC);
- }
-
- /// isProperSubClass - Returns true if RC has a legal super-class with more
- /// allocatable registers.
- ///
- /// Register classes like GR32_NOSP are not proper sub-classes because %esp
- /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb
- /// mode because the GPR super-class is not legal.
- bool isProperSubClass(const TargetRegisterClass *RC) const {
- return get(RC).ProperSubClass;
- }
-
- /// getLastCalleeSavedAlias - Returns the last callee saved register that
- /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR.
- unsigned getLastCalleeSavedAlias(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- if (unsigned N = CSRNum[PhysReg])
- return CalleeSaved[N-1];
- return 0;
- }
-
- /// isReserved - Returns true when PhysReg is a reserved register.
- ///
- /// Reserved registers may belong to an allocatable register class, but the
- /// target has explicitly requested that they are not used.
- ///
- bool isReserved(unsigned PhysReg) const {
- return Reserved.test(PhysReg);
- }
-
- /// isAllocatable - Returns true when PhysReg belongs to an allocatable
- /// register class and it hasn't been reserved.
- ///
- /// Allocatable registers may show up in the allocation order of some virtual
- /// register, so a register allocator needs to track its liveness and
- /// availability.
- bool isAllocatable(unsigned PhysReg) const {
- return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
- }
-};
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 75f88ca..9906334 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,34 +16,35 @@
#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "LiveDebugVariables.h"
-#include "RegisterClassInfo.h"
#include "VirtRegMap.h"
#include "llvm/Pass.h"
#include "llvm/Value.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -53,8 +54,6 @@ STATISTIC(numCrossRCs , "Number of cross class joins performed");
STATISTIC(numCommutes , "Number of instruction commuting performed");
STATISTIC(numExtends , "Number of copies extended");
STATISTIC(NumReMats , "Number of instructions re-materialized");
-STATISTIC(numPeep , "Number of identity moves eliminated after coalescing");
-STATISTIC(numAborts , "Number of times interval joining aborted");
STATISTIC(NumInflated , "Number of register classes inflated");
static cl::opt<bool>
@@ -63,22 +62,13 @@ EnableJoining("join-liveintervals",
cl::init(true));
static cl::opt<bool>
-DisableCrossClassJoin("disable-cross-class-join",
- cl::desc("Avoid coalescing cross register class copies"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-EnablePhysicalJoin("join-physregs",
- cl::desc("Join physical register copies"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
VerifyCoalescing("verify-coalescing",
cl::desc("Verify machine instrs before and after register coalescing"),
cl::Hidden);
namespace {
- class RegisterCoalescer : public MachineFunctionPass {
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
MachineFunction* MF;
MachineRegisterInfo* MRI;
const TargetMachine* TM;
@@ -90,87 +80,83 @@ namespace {
AliasAnalysis *AA;
RegisterClassInfo RegClassInfo;
- /// JoinedCopies - Keep track of copies eliminated due to coalescing.
- ///
- SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+ /// WorkList - Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+
+ /// ErasedInstrs - Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<unsigned, 8> InflateRegs;
- /// ReMatCopies - Keep track of copies eliminated due to remat.
- ///
- SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs();
- /// ReMatDefs - Keep track of definition instructions which have
- /// been remat'ed.
- SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+ /// LiveRangeEdit callback.
+ void LRE_WillEraseInstruction(MachineInstr *MI);
- /// joinIntervals - join compatible live intervals
- void joinIntervals();
+ /// joinAllIntervals - join compatible live intervals
+ void joinAllIntervals();
- /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
- /// copies that cannot yet be coalesced into the "TryAgain" list.
- void CopyCoalesceInMBB(MachineBasicBlock *MBB,
- std::vector<MachineInstr*> &TryAgain);
+ /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
- /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+ /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
+ /// position From. Return true if any progress was made.
+ bool copyCoalesceWorkList(unsigned From = 0);
+
+ /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns
/// true if the copy was successfully coalesced away. If it is not
/// currently possible to coalesce this interval, but it may be possible if
/// other things get coalesced, then it returns true by reference in
/// 'Again'.
- bool JoinCopy(MachineInstr *TheCopy, bool &Again);
+ bool joinCopy(MachineInstr *TheCopy, bool &Again);
- /// JoinIntervals - Attempt to join these two intervals. On failure, this
+ /// joinIntervals - Attempt to join these two intervals. On failure, this
/// returns false. The output "SrcInt" will not have been modified, so we
/// can use this information below to update aliases.
- bool JoinIntervals(CoalescerPair &CP);
+ bool joinIntervals(CoalescerPair &CP);
+
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
- /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+ /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
- bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
- /// HasOtherReachingDefs - Return true if there are definitions of IntB
+ /// hasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
- bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
VNInfo *AValNo, VNInfo *BValNo);
- /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+ /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
- /// ReMaterializeTrivialDef - If the source of a copy is defined by a
+ /// reMaterializeTrivialDef - If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
- /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
- unsigned DstReg, MachineInstr *CopyMI);
-
- /// shouldJoinPhys - Return true if a physreg copy should be joined.
- bool shouldJoinPhys(CoalescerPair &CP);
-
- /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
- /// two virtual registers from different register classes.
- bool isWinToJoinCrossClass(unsigned SrcReg,
- unsigned DstReg,
- const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *NewRC);
-
- /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+ bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
+ MachineInstr *CopyMI);
+
+ /// canJoinPhys - Return true if a physreg copy should be joined.
+ bool canJoinPhys(CoalescerPair &CP);
+
+ /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
- void UpdateRegDefsUses(const CoalescerPair &CP);
-
- /// RemoveDeadDef - If a def of a live interval is now determined dead,
- /// remove the val# it defines. If the live interval becomes empty, remove
- /// it as well.
- bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
-
- /// markAsJoined - Remember that CopyMI has already been joined.
- void markAsJoined(MachineInstr *CopyMI);
+ void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
/// eliminateUndefCopy - Handle copies of undef values.
bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
@@ -233,7 +219,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- SrcReg = DstReg = SubIdx = 0;
+ SrcReg = DstReg = 0;
+ SrcIdx = DstIdx = 0;
NewRC = 0;
Flipped = CrossClass = false;
@@ -271,39 +258,44 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
}
} else {
// Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
// Both registers have subreg indices.
if (SrcSub && DstSub) {
- // For now we only handle the case of identical indices in commensurate
- // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
- // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
- if (SrcSub != DstSub)
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
return false;
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (!TRI.getCommonSubClass(DstRC, SrcRC))
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
return false;
- SrcSub = DstSub = 0;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
}
- // There can be no SrcSub.
- if (SrcSub) {
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
std::swap(Src, Dst);
- DstSub = SrcSub;
- SrcSub = 0;
- assert(!Flipped && "Unexpected flip");
- Flipped = true;
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
}
- // Find the new register class.
- const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
- const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
- if (DstSub)
- NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
- else
- NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
- if (!NewRC)
- return false;
CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
@@ -312,14 +304,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
"Cannot have a physical SubIdx");
SrcReg = Src;
DstReg = Dst;
- SubIdx = DstSub;
return true;
}
bool CoalescerPair::flip() {
- if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
return false;
std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
Flipped = !Flipped;
return true;
}
@@ -343,7 +335,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
if (!TargetRegisterInfo::isPhysicalRegister(Dst))
return false;
- assert(!SubIdx && "Inconsistent CoalescerPair state.");
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
if (DstSub)
Dst = TRI.getSubReg(Dst, DstSub);
@@ -357,7 +349,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (DstReg != Dst)
return false;
// Registers match, do the subregisters line up?
- return compose(TRI, SubIdx, SrcSub) == DstSub;
+ return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub);
}
}
@@ -375,19 +367,18 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
- /// Joined copies are not deleted immediately, but kept in JoinedCopies.
- JoinedCopies.insert(CopyMI);
+void RegisterCoalescer::eliminateDeadDefs() {
+ SmallVector<LiveInterval*, 8> NewRegs;
+ LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
+}
- /// Mark all register operands of CopyMI as <undef> so they won't affect dead
- /// code elimination.
- for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
- E = CopyMI->operands_end(); I != E; ++I)
- if (I->isReg())
- I->setIsUndef(true);
+// Callback from eliminateDeadDefs().
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
}
-/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
/// being the source and IntB being the dest, thus this defines a value number
/// in IntB. If the source value number (in IntA) is defined by a copy from B,
/// see if we can merge these two pieces of B into a single value number,
@@ -402,12 +393,10 @@ void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
///
/// This returns true if an interval was modified.
///
-bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
- // Bail if there is no dst interval - can happen when merging physical subreg
- // operations.
- if (!LIS->hasInterval(CP.getDstReg()))
- return false;
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
LiveInterval &IntA =
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -457,24 +446,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// IntB, we can merge them.
if (ValLR+1 != BLR) return false;
- // If a live interval is a physical register, conservatively check if any
- // of its aliases is overlapping the live interval of the virtual register.
- // If so, do not coalesce.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
- if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
- DEBUG({
- dbgs() << "\t\tInterfere with alias ";
- LIS->getInterval(*AS).print(dbgs(), TRI);
- });
- return false;
- }
- }
-
- DEBUG({
- dbgs() << "Extending: ";
- IntB.print(dbgs(), TRI);
- });
+ DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -487,33 +459,10 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// two value numbers.
IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
- // If the IntB live range is assigned to a physical register, and if that
- // physreg has sub-registers, update their live intervals as well.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
- if (!LIS->hasInterval(*SR))
- continue;
- LiveInterval &SRLI = LIS->getInterval(*SR);
- SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart,
- LIS->getVNInfoAllocator())));
- }
- }
-
// Okay, merge "B1" into the same value number as "B0".
- if (BValNo != ValLR->valno) {
- // If B1 is killed by a PHI, then the merged live range must also be killed
- // by the same PHI, as B0 and B1 can not overlap.
- bool HasPHIKill = BValNo->hasPHIKill();
+ if (BValNo != ValLR->valno)
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
- if (HasPHIKill)
- ValLR->valno->setHasPHIKill(true);
- }
- DEBUG({
- dbgs() << " result = ";
- IntB.print(dbgs(), TRI);
- dbgs() << "\n";
- });
+ DEBUG(dbgs() << " result = " << IntB << '\n');
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
@@ -525,8 +474,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Rewrite the copy. If the copy instruction was killing the destination
// register before the merge, find the last use and trim the live range. That
// will also add the isKill marker.
- CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
- *TRI);
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
if (ALR->end == CopyIdx)
LIS->shrinkToUses(&IntA);
@@ -534,12 +482,17 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
return true;
}
-/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// hasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
-bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
- LiveInterval &IntB,
- VNInfo *AValNo,
- VNInfo *BValNo) {
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -559,7 +512,7 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
/// commutable instruction and its other operand is coalesced to the copy dest
@@ -582,18 +535,9 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
///
/// This returns true if an interval was modified.
///
-bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
- MachineInstr *CopyMI) {
- // FIXME: For now, only eliminate the copy by commuting its def when the
- // source register is a virtual register. We want to guard against cases
- // where the copy is a back edge copy and commuting the def lengthen the
- // live interval of the source register to the entire loop.
- if (CP.isPhys() && CP.isFlipped())
- return false;
-
- // Bail if there is no dst interval.
- if (!LIS->hasInterval(CP.getDstReg()))
- return false;
+bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert (!CP.isPhys());
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
@@ -613,10 +557,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// AValNo is the value number in A that defines the copy, A3 in the example.
VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
assert(AValNo && "COPY source not live");
-
- // If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+ if (AValNo->isPHIDef() || AValNo->isUnused())
return false;
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -647,17 +588,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Make sure there are no other definitions of IntB that would reach the
// uses which the new definition can reach.
- if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
return false;
- // Abort if the aliases of IntB.reg have values that are not simply the
- // clobbers from the superreg.
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
- if (LIS->hasInterval(*AS) &&
- HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
- return false;
-
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
for (MachineRegisterInfo::use_nodbg_iterator UI =
@@ -666,13 +599,14 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *UseMI = &*UI;
SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
- if (ULR == IntA.end())
+ if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
- if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
return false;
}
- DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
<< *DefMI);
// At this point we have decided that it is legal to do this
@@ -709,8 +643,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
++UI;
- if (JoinedCopies.count(UseMI))
- continue;
if (UseMI->isDebugValue()) {
// FIXME These don't have an instruction index. Not clear we have enough
// info to decide whether to do this replacement or not. For now do it.
@@ -721,6 +653,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
if (TargetRegisterInfo::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
@@ -742,7 +676,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
assert(DVNI->def == DefIdx);
BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
- markAsJoined(UseMI);
+ ErasedInstrs.insert(UseMI);
+ LIS->RemoveMachineInstrFromMaps(UseMI);
+ UseMI->eraseFromParent();
}
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
@@ -762,12 +698,11 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
-/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
-bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
- bool preserveSrcInt,
- unsigned DstReg,
- MachineInstr *CopyMI) {
+bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
+ unsigned DstReg,
+ MachineInstr *CopyMI) {
SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
assert(SrcLR != SrcInt.end() && "Live range not found!");
@@ -792,7 +727,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
// Make sure the copy destination register class fits the instruction
// definition register class. The mismatch can happen as a result of earlier
// extract_subreg, insert_subreg, subreg_to_reg coalescing.
- const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
if (MRI->getRegClass(DstReg) != RC)
return false;
@@ -838,23 +773,21 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
- unsigned reg = NewMIImplDefs[i];
- LiveInterval &li = LIS->getInterval(reg);
- VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
- LIS->getVNInfoAllocator());
- LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
- li.addRange(lr);
+ unsigned Reg = NewMIImplDefs[i];
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
+ LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
CopyMI->eraseFromParent();
- ReMatCopies.insert(CopyMI);
- ReMatDefs.insert(DefMI);
+ ErasedInstrs.insert(CopyMI);
DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
// The source interval can become smaller because we removed a use.
- if (preserveSrcInt)
- LIS->shrinkToUses(&SrcInt);
+ LIS->shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty())
+ eliminateDeadDefs();
return true;
}
@@ -902,51 +835,40 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
return true;
}
-/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
-void
-RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
- bool DstIsPhys = CP.isPhys();
- unsigned SrcReg = CP.getSrcReg();
- unsigned DstReg = CP.getDstReg();
- unsigned SubIdx = CP.getSubIdx();
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
// Update LiveDebugVariables.
LDV->renameRegister(SrcReg, DstReg, SubIdx);
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
- // A PhysReg copy that won't be coalesced can perhaps be rematerialized
- // instead.
- if (DstIsPhys) {
- if (UseMI->isFullCopy() &&
- UseMI->getOperand(1).getReg() == SrcReg &&
- UseMI->getOperand(0).getReg() != SrcReg &&
- UseMI->getOperand(0).getReg() != DstReg &&
- !JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false,
- UseMI->getOperand(0).getReg(), UseMI))
- continue;
- }
-
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx)
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = UseMI->getOperand(Ops[i]);
- // Make sure we don't create read-modify-write defs accidentally. We
- // assume here that a SrcReg def cannot be joined into a live DstReg. If
- // RegisterCoalescer starts tracking partially live registers, we will
- // need to check the actual LiveInterval to determine if DstReg is live
- // here.
- if (SubIdx && !Reads)
- MO.setIsUndef();
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
if (DstIsPhys)
MO.substPhysReg(DstReg, *TRI);
@@ -954,10 +876,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
- // This instruction is a copy that will be removed.
- if (JoinedCopies.count(UseMI))
- continue;
-
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
@@ -967,210 +885,107 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
}
}
-/// removeIntervalIfEmpty - Check if the live interval of a physical register
-/// is empty, if so remove it and also remove the empty intervals of its
-/// sub-registers. Return true if live interval is removed.
-static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
- const TargetRegisterInfo *TRI) {
- if (li.empty()) {
- if (TargetRegisterInfo::isPhysicalRegister(li.reg))
- for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
- if (!LIS->hasInterval(*SR))
- continue;
- LiveInterval &sli = LIS->getInterval(*SR);
- if (sli.empty())
- LIS->removeInterval(*SR);
- }
- LIS->removeInterval(li.reg);
- return true;
- }
- return false;
-}
-
-/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
-/// the val# it defines. If the live interval becomes empty, remove it as well.
-bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
- MachineInstr *DefMI) {
- SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
- LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
- if (DefIdx != MLR->valno->def)
- return false;
- li.removeValNo(MLR->valno);
- return removeIntervalIfEmpty(li, LIS, TRI);
-}
-
-/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
-/// We need to be careful about coalescing a source physical register with a
-/// virtual register. Once the coalescing is done, it cannot be broken and these
-/// are not spillable! If the destination interval uses are far away, think
-/// twice about coalescing them!
-bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
- bool Allocatable = LIS->isAllocatable(CP.getDstReg());
- LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
-
+/// canJoinPhys - Return true if a copy involving a physreg should be joined.
+bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
- if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
- return true;
-
- if (!EnablePhysicalJoin) {
- DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
- return false;
- }
-
- // Only coalesce to allocatable physreg, we don't want to risk modifying
- // reserved registers.
- if (!Allocatable) {
- DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
- return false; // Not coalescable.
- }
-
- // Don't join with physregs that have a ridiculous number of live
- // ranges. The data structure performance is really bad when that
- // happens.
- if (LIS->hasInterval(CP.getDstReg()) &&
- LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) {
- ++numAborts;
- DEBUG(dbgs()
- << "\tPhysical register live interval too complicated, abort!\n");
+ if (!RegClassInfo.isReserved(CP.getDstReg())) {
+ DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
- // FIXME: Why are we skipping this test for partial copies?
- // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
- if (!CP.isPartial()) {
- const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg());
- unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
- unsigned Length = LIS->getApproximateInstructionCount(JoinVInt);
- if (Length > Threshold) {
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- return false;
- }
- }
- return true;
-}
-
-/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
-/// two virtual registers from different register classes.
-bool
-RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
- unsigned DstReg,
- const TargetRegisterClass *SrcRC,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *NewRC) {
- unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
- // This heuristics is good enough in practice, but it's obviously not *right*.
- // 4 is a magic number that works well enough for x86, ARM, etc. It filter
- // out all but the most restrictive register classes.
- if (NewRCCount > 4 ||
- // Early exit if the function is fairly small, coalesce aggressively if
- // that's the case. For really special register classes with 3 or
- // fewer registers, be a bit more careful.
- (LIS->getFuncInstructionCount() / NewRCCount) < 8)
- return true;
- LiveInterval &SrcInt = LIS->getInterval(SrcReg);
- LiveInterval &DstInt = LIS->getInterval(DstReg);
- unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt);
- unsigned DstSize = LIS->getApproximateInstructionCount(DstInt);
-
- // Coalesce aggressively if the intervals are small compared to the number of
- // registers in the new class. The number 4 is fairly arbitrary, chosen to be
- // less aggressive than the 8 used for the whole function size.
- const unsigned ThresSize = 4 * NewRCCount;
- if (SrcSize <= ThresSize && DstSize <= ThresSize)
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (CP.isFlipped() && JoinVInt.containsOneValue())
return true;
- // Estimate *register use density*. If it doubles or more, abort.
- unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg),
- MRI->use_nodbg_end());
- unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg),
- MRI->use_nodbg_end());
- unsigned NewUses = SrcUses + DstUses;
- unsigned NewSize = SrcSize + DstSize;
- if (SrcRC != NewRC && SrcSize > ThresSize) {
- unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
- if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
- return false;
- }
- if (DstRC != NewRC && DstSize > ThresSize) {
- unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
- if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
- return false;
- }
- return true;
+ DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
+ return false;
}
-
-/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
/// if the copy was successfully coalesced away. If it is not currently
/// possible to coalesce this interval, but it may be possible if other
/// things get coalesced, then it returns true by reference in 'Again'.
-bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
- if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
- return false; // Already done.
-
DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
- CoalescerPair CP(*TII, *TRI);
+ CoalescerPair CP(*TRI);
if (!CP.setRegisters(CopyMI)) {
DEBUG(dbgs() << "\tNot coalescable.\n");
return false;
}
- // If they are already joined we continue.
- if (CP.getSrcReg() == CP.getDstReg()) {
- markAsJoined(CopyMI);
- DEBUG(dbgs() << "\tCopy already coalesced.\n");
- return false; // Not coalescable.
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
}
// Eliminate undefs.
if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
- markAsJoined(CopyMI);
DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
return false; // Not coalescable.
}
- DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
- << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx())
- << "\n");
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+ return true;
+ }
// Enforce policies.
if (CP.isPhys()) {
- if (!shouldJoinPhys(CP)) {
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ << '\n');
+ if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
CP.getDstReg(), CopyMI))
return true;
return false;
}
} else {
- // Avoid constraining virtual register regclass too much.
- if (CP.isCrossClass()) {
- DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
- if (DisableCrossClassJoin) {
- DEBUG(dbgs() << "\tCross-class joins disabled.\n");
- return false;
- }
- if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
- MRI->getRegClass(CP.getSrcReg()),
- MRI->getRegClass(CP.getDstReg()),
- CP.getNewRC())) {
- DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- }
+ DEBUG({
+ dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
+ << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << PrintReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << PrintReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
+ << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
// When possible, let DstReg be the larger interval.
- if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
LIS->getInterval(CP.getDstReg()).ranges.size())
CP.flip();
}
@@ -1179,21 +994,22 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
// Otherwise, if one of the intervals being joined is a physreg, this method
// always canonicalizes DstInt to be it. The output "SrcInt" will not have
// been modified, so we can use this information below to update aliases.
- if (!JoinIntervals(CP)) {
+ if (!joinIntervals(CP)) {
// Coalescing failed.
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true,
+ reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
CP.getDstReg(), CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
- if (!CP.isPartial()) {
- if (AdjustCopiesBackFrom(CP, CopyMI) ||
- RemoveCopyByCommutingDef(CP, CopyMI)) {
- markAsJoined(CopyMI);
+ if (!CP.isPartial() && !CP.isPhys()) {
+ if (adjustCopiesBackFrom(CP, CopyMI) ||
+ removeCopyByCommutingDef(CP, CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
DEBUG(dbgs() << "\tTrivial!\n");
return true;
}
@@ -1212,29 +1028,21 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
}
- // Remember to delete the copy instruction.
- markAsJoined(CopyMI);
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
- UpdateRegDefsUses(CP);
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
- // If we have extended the live range of a physical register, make sure we
- // update live-in lists as well.
- if (CP.isPhys()) {
- SmallVector<MachineBasicBlock*, 16> BlockSeq;
- // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
- // ranges for this, and they are preserved.
- LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg());
- for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
- I != E; ++I ) {
- LIS->findLiveInMBBs(I->start, I->end, BlockSeq);
- for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
- MachineBasicBlock &block = *BlockSeq[idx];
- if (!block.isLiveIn(CP.getDstReg()))
- block.addLiveIn(CP.getDstReg());
- }
- BlockSeq.clear();
- }
- }
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// SrcReg is guaranteed to be the register whose live interval that is
// being merged.
@@ -1244,16 +1052,56 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
DEBUG({
- LiveInterval &DstInt = LIS->getInterval(CP.getDstReg());
- dbgs() << "\tJoined. Result = ";
- DstInt.print(dbgs(), TRI);
- dbgs() << "\n";
+ dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
+ if (!CP.isPhys())
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
});
++numJoins;
return true;
}
+/// Attempt joining with a reserved physreg.
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
+
+ assert(CP.isFlipped() && RHS.containsOneValue() &&
+ "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // Delete the identity copy.
+ MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI->eraseFromParent();
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
/// ComputeUltimateVN - Assuming we are going to join two live intervals,
/// compute what the resultant value numbers for each value in the input two
/// ranges will be. This is complicated by copies between the two which can
@@ -1320,144 +1168,70 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
const TargetRegisterInfo &tri,
CoalescerPair &CP,
VNInfo *VNI,
- LiveRange *LR,
+ VNInfo *OtherVNI,
SmallVector<MachineInstr*, 8> &DupCopies) {
// FIXME: This is very conservative. For example, we don't handle
// physical registers.
MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
- if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+ if (!MI || CP.isPartial() || CP.isPhys())
return false;
- unsigned Dst = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
-
- if (!TargetRegisterInfo::isVirtualRegister(Src) ||
- !TargetRegisterInfo::isVirtualRegister(Dst))
+ unsigned A = CP.getDstReg();
+ if (!TargetRegisterInfo::isVirtualRegister(A))
return false;
- unsigned A = CP.getDstReg();
unsigned B = CP.getSrcReg();
-
- if (B == Dst)
- std::swap(A, B);
- assert(Dst == A);
-
- VNInfo *Other = LR->valno;
- const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
-
- if (!OtherMI || !OtherMI->isFullCopy())
+ if (!TargetRegisterInfo::isVirtualRegister(B))
return false;
- unsigned OtherDst = OtherMI->getOperand(0).getReg();
- unsigned OtherSrc = OtherMI->getOperand(1).getReg();
-
- if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
- !TargetRegisterInfo::isVirtualRegister(OtherDst))
+ MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def);
+ if (!OtherMI)
return false;
- assert(OtherDst == B);
-
- if (Src != OtherSrc)
- return false;
+ if (MI->isImplicitDef()) {
+ DupCopies.push_back(MI);
+ return true;
+ } else {
+ if (!MI->isFullCopy())
+ return false;
+ unsigned Src = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Src))
+ return false;
+ if (!OtherMI->isFullCopy())
+ return false;
+ unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(OtherSrc))
+ return false;
- // If the copies use two different value numbers of X, we cannot merge
- // A and B.
- LiveInterval &SrcInt = li.getInterval(Src);
- // getVNInfoBefore returns NULL for undef copies. In this case, the
- // optimization is still safe.
- if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def))
- return false;
+ if (Src != OtherSrc)
+ return false;
- DupCopies.push_back(MI);
+ // If the copies use two different value numbers of X, we cannot merge
+ // A and B.
+ LiveInterval &SrcInt = li.getInterval(Src);
+ // getVNInfoBefore returns NULL for undef copies. In this case, the
+ // optimization is still safe.
+ if (SrcInt.getVNInfoBefore(OtherVNI->def) !=
+ SrcInt.getVNInfoBefore(VNI->def))
+ return false;
- return true;
+ DupCopies.push_back(MI);
+ return true;
+ }
}
-/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// joinIntervals - Attempt to join these two intervals. On failure, this
/// returns false.
-bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
- LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
- DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; });
-
- // If a live interval is a physical register, check for interference with any
- // aliases. The interference check implemented here is a bit more conservative
- // than the full interfeence check below. We allow overlapping live ranges
- // only when one is a copy of the other.
- if (CP.isPhys()) {
- // Optimization for reserved registers like ESP.
- // We can only merge with a reserved physreg if RHS has a single value that
- // is a copy of CP.DstReg(). The live range of the reserved register will
- // look like a set of dead defs - we don't properly track the live range of
- // reserved registers.
- if (RegClassInfo.isReserved(CP.getDstReg())) {
- assert(CP.isFlipped() && RHS.containsOneValue() &&
- "Invalid join with reserved register");
- // Deny any overlapping intervals. This depends on all the reserved
- // register live ranges to look like dead defs.
- for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
- if (!LIS->hasInterval(*AS)) {
- // Make sure at least DstReg itself exists before attempting a join.
- if (*AS == CP.getDstReg())
- LIS->getOrCreateInterval(CP.getDstReg());
- continue;
- }
- if (RHS.overlaps(LIS->getInterval(*AS))) {
- DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
- return false;
- }
- }
- // Skip any value computations, we are not adding new values to the
- // reserved register. Also skip merging the live ranges, the reserved
- // register live range doesn't need to be accurate as long as all the
- // defs are there.
- return true;
- }
-
- // Check if a register mask clobbers DstReg.
- BitVector UsableRegs;
- if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
- !UsableRegs.test(CP.getDstReg())) {
- DEBUG(dbgs() << "\t\tRegister mask interference.\n");
- return false;
- }
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ // Handle physreg joins separately.
+ if (CP.isPhys())
+ return joinReservedPhysReg(CP);
- for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
- if (!LIS->hasInterval(*AS))
- continue;
- const LiveInterval &LHS = LIS->getInterval(*AS);
- LiveInterval::const_iterator LI = LHS.begin();
- for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
- RI != RE; ++RI) {
- LI = std::lower_bound(LI, LHS.end(), RI->start);
- // Does LHS have an overlapping live range starting before RI?
- if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
- (RI->start != RI->valno->def ||
- !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) {
- DEBUG({
- dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), TRI);
- dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
- });
- return false;
- }
-
- // Check that LHS ranges beginning in this range are copies.
- for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
- if (LI->start != LI->valno->def ||
- !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) {
- DEBUG({
- dbgs() << "\t\tInterference from alias: ";
- LHS.print(dbgs(), TRI);
- dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
- });
- return false;
- }
- }
- }
- }
- }
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << '\n');
// Compute the final value assignment, assuming that the live ranges can be
// coalesced.
@@ -1468,9 +1242,11 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
SmallVector<MachineInstr*, 8> DupCopies;
+ SmallVector<MachineInstr*, 8> DeadCopies;
LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
- DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; });
+ DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS
+ << '\n');
// Loop over the value numbers of the LHS, seeing if any are defined from
// the RHS.
@@ -1481,21 +1257,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
continue;
MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
assert(MI && "Missing def");
- if (!MI->isCopyLike()) // Src not defined by a copy?
+ if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
continue;
// Figure out the value # from the RHS.
- LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def);
// The copy could be to an aliased physreg.
- if (!lr) continue;
+ if (!OtherVNI)
+ continue;
// DstReg is known to be a register in the LHS interval. If the src is
// from the RHS interval, we can use its value #.
- if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ if (CP.isCoalescable(MI))
+ DeadCopies.push_back(MI);
+ else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
+ DupCopies))
continue;
- LHSValsDefinedFromRHS[VNI] = lr->valno;
+ LHSValsDefinedFromRHS[VNI] = OtherVNI;
}
// Loop over the value numbers of the RHS, seeing if any are defined from
@@ -1507,21 +1286,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
continue;
MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
assert(MI && "Missing def");
- if (!MI->isCopyLike()) // Src not defined by a copy?
+ if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
continue;
// Figure out the value # from the LHS.
- LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def);
// The copy could be to an aliased physreg.
- if (!lr) continue;
+ if (!OtherVNI)
+ continue;
// DstReg is known to be a register in the RHS interval. If the src is
// from the LHS interval, we can use its value #.
- if (!CP.isCoalescable(MI) &&
- !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
+ if (CP.isCoalescable(MI))
+ DeadCopies.push_back(MI);
+ else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
+ DupCopies))
continue;
- RHSValsDefinedFromLHS[VNI] = lr->valno;
+ RHSValsDefinedFromLHS[VNI] = OtherVNI;
}
LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
@@ -1563,6 +1345,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
LiveInterval::const_iterator J = RHS.begin();
LiveInterval::const_iterator JE = RHS.end();
+ // Collect interval end points that will no longer be kills.
+ SmallVector<MachineInstr*, 8> LHSOldKills;
+ SmallVector<MachineInstr*, 8> RHSOldKills;
+
// Skip ahead until the first place of potential sharing.
if (I != IE && J != JE) {
if (I->start < J->start) {
@@ -1576,20 +1362,21 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
while (I != IE && J != JE) {
// Determine if these two live ranges overlap.
- bool Overlaps;
- if (I->start < J->start) {
- Overlaps = I->end > J->start;
- } else {
- Overlaps = J->end > I->start;
- }
-
// If so, check value # info to determine if they are really different.
- if (Overlaps) {
+ if (I->end > J->start && J->end > I->start) {
// If the live range overlap will map to the same value number in the
// result liverange, we can still coalesce them. If not, we can't.
if (LHSValNoAssignments[I->valno->id] !=
RHSValNoAssignments[J->valno->id])
return false;
+
+ // Extended live ranges should no longer be killed.
+ if (!I->end.isBlock() && I->end < J->end)
+ if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end))
+ LHSOldKills.push_back(MI);
+ if (!J->end.isBlock() && J->end < I->end)
+ if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end))
+ RHSOldKills.push_back(MI);
}
if (I->end < J->end)
@@ -1598,47 +1385,48 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
++J;
}
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
- E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned LHSValID = LHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[LHSValID]->setHasPHIKill(true);
- }
-
- // Update kill info. Some live ranges are extended due to copy coalescing.
- for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
- E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
- VNInfo *VNI = I->first;
- unsigned RHSValID = RHSValNoAssignments[VNI->id];
- if (VNI->hasPHIKill())
- NewVNInfo[RHSValID]->setHasPHIKill(true);
- }
+ // Clear kill flags where live ranges are extended.
+ while (!LHSOldKills.empty())
+ LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
+ while (!RHSOldKills.empty())
+ RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI);
if (LHSValNoAssignments.empty())
LHSValNoAssignments.push_back(-1);
if (RHSValNoAssignments.empty())
RHSValNoAssignments.push_back(-1);
+ // Now erase all the redundant copies.
+ for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) {
+ MachineInstr *MI = DeadCopies[i];
+ if (!ErasedInstrs.insert(MI))
+ continue;
+ DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI)
+ << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
+
SmallVector<unsigned, 8> SourceRegisters;
for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
E = DupCopies.end(); I != E; ++I) {
MachineInstr *MI = *I;
+ if (!ErasedInstrs.insert(MI))
+ continue;
- // We have pretended that the assignment to B in
+ // If MI is a copy, then we have pretended that the assignment to B in
// A = X
// B = X
// was actually a copy from A. Now that we decided to coalesce A and B,
// transform the code into
// A = X
- // X = X
- // and mark the X as coalesced to keep the illusion.
- unsigned Src = MI->getOperand(1).getReg();
- SourceRegisters.push_back(Src);
- MI->getOperand(0).substVirtReg(Src, 0, *TRI);
-
- markAsJoined(MI);
+ // In the case of the implicit_def, we just have to remove it.
+ if (!MI->isImplicitDef()) {
+ unsigned Src = MI->getOperand(1).getReg();
+ SourceRegisters.push_back(Src);
+ }
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
}
// If B = X was the last use of X in a liverange, we have to shrink it now
@@ -1678,73 +1466,58 @@ namespace {
};
}
-void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
- std::vector<MachineInstr*> &TryAgain) {
- DEBUG(dbgs() << MBB->getName() << ":\n");
-
- SmallVector<MachineInstr*, 8> VirtCopies;
- SmallVector<MachineInstr*, 8> PhysCopies;
- SmallVector<MachineInstr*, 8> ImpDefCopies;
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E;) {
- MachineInstr *Inst = MII++;
-
- // If this isn't a copy nor a extract_subreg, we can't join intervals.
- unsigned SrcReg, DstReg;
- if (Inst->isCopy()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(1).getReg();
- } else if (Inst->isSubregToReg()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(2).getReg();
- } else
+// Try joining WorkList copies starting from index From.
+// Null out any successful joins.
+bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
+ assert(From <= WorkList.size() && "Out of range");
+ bool Progress = false;
+ for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
+ if (!WorkList[i])
continue;
-
- bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty())
- ImpDefCopies.push_back(Inst);
- else if (SrcIsPhys || DstIsPhys)
- PhysCopies.push_back(Inst);
- else
- VirtCopies.push_back(Inst);
- }
-
- // Try coalescing implicit copies and insert_subreg <undef> first,
- // followed by copies to / from physical registers, then finally copies
- // from virtual registers to virtual registers.
- for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = ImpDefCopies[i];
- bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
- }
- for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = PhysCopies[i];
- bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
- }
- for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
- MachineInstr *TheCopy = VirtCopies[i];
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.erase(WorkList[i])) {
+ WorkList[i] = 0;
+ continue;
+ }
bool Again = false;
- if (!JoinCopy(TheCopy, Again))
- if (Again)
- TryAgain.push_back(TheCopy);
+ bool Success = joinCopy(WorkList[i], Again);
+ Progress |= Success;
+ if (Success || !Again)
+ WorkList[i] = 0;
}
+ return Progress;
}
-void RegisterCoalescer::joinIntervals() {
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII)
+ if (MII->isCopyLike())
+ WorkList.push_back(MII);
+
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ if (copyCoalesceWorkList(PrevSize))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ (MachineInstr*)0), WorkList.end());
+}
+
+void RegisterCoalescer::joinAllIntervals() {
DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && "Old data still around.");
- std::vector<MachineInstr*> TryAgainList;
if (Loops->empty()) {
// If there are no loops in the function, join intervals in function order.
for (MachineFunction::iterator I = MF->begin(), E = MF->end();
I != E; ++I)
- CopyCoalesceInMBB(I, TryAgainList);
+ copyCoalesceInMBB(I);
} else {
// Otherwise, join intervals in inner loops before other intervals.
// Unfortunately we can't just iterate over loop hierarchy here because
@@ -1763,34 +1536,20 @@ void RegisterCoalescer::joinIntervals() {
// Finally, join intervals in loop nest order.
for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
- CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+ copyCoalesceInMBB(MBBs[i].second);
}
// Joining intervals can allow other intervals to be joined. Iteratively join
// until we make no progress.
- bool ProgressMade = true;
- while (ProgressMade) {
- ProgressMade = false;
-
- for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
- MachineInstr *&TheCopy = TryAgainList[i];
- if (!TheCopy)
- continue;
-
- bool Again = false;
- bool Success = JoinCopy(TheCopy, Again);
- if (Success || !Again) {
- TheCopy= 0; // Mark this one as done.
- ProgressMade = true;
- }
- }
- }
+ while (copyCoalesceWorkList())
+ /* empty */ ;
}
void RegisterCoalescer::releaseMemory() {
- JoinedCopies.clear();
- ReMatCopies.clear();
- ReMatDefs.clear();
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
@@ -1814,138 +1573,11 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
RegClassInfo.runOnMachineFunction(fn);
// Join (coalesce) intervals if requested.
- if (EnableJoining) {
- joinIntervals();
- DEBUG({
- dbgs() << "********** INTERVALS POST JOINING **********\n";
- for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end();
- I != E; ++I){
- I->second->print(dbgs(), TRI);
- dbgs() << "\n";
- }
- });
- }
-
- // Perform a final pass over the instructions and compute spill weights
- // and remove identity moves.
- SmallVector<unsigned, 4> DeadDefs, InflateRegs;
- for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
- mbbi != mbbe; ++mbbi) {
- MachineBasicBlock* mbb = mbbi;
- for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
- mii != mie; ) {
- MachineInstr *MI = mii;
- if (JoinedCopies.count(MI)) {
- // Delete all coalesced copies.
- bool DoDelete = true;
- assert(MI->isCopyLike() && "Unrecognized copy instruction");
- unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
- unsigned DstReg = MI->getOperand(0).getReg();
-
- // Collect candidates for register class inflation.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
- InflateRegs.push_back(SrcReg);
- if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
- InflateRegs.push_back(DstReg);
-
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- MI->getNumOperands() > 2)
- // Do not delete extract_subreg, insert_subreg of physical
- // registers unless the definition is dead. e.g.
- // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
- // or else the scavenger may complain. LowerSubregs will
- // delete them later.
- DoDelete = false;
-
- if (MI->allDefsAreDead()) {
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- LIS->hasInterval(SrcReg))
- LIS->shrinkToUses(&LIS->getInterval(SrcReg));
- DoDelete = true;
- }
- if (!DoDelete) {
- // We need the instruction to adjust liveness, so make it a KILL.
- if (MI->isSubregToReg()) {
- MI->RemoveOperand(3);
- MI->RemoveOperand(1);
- }
- MI->setDesc(TII->get(TargetOpcode::KILL));
- mii = llvm::next(mii);
- } else {
- LIS->RemoveMachineInstrFromMaps(MI);
- mii = mbbi->erase(mii);
- ++numPeep;
- }
- continue;
- }
-
- // Now check if this is a remat'ed def instruction which is now dead.
- if (ReMatDefs.count(MI)) {
- bool isDead = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!Reg)
- continue;
- DeadDefs.push_back(Reg);
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- // Remat may also enable register class inflation.
- if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
- InflateRegs.push_back(Reg);
- }
- if (MO.isDead())
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->use_nodbg_empty(Reg)) {
- isDead = false;
- break;
- }
- }
- if (isDead) {
- while (!DeadDefs.empty()) {
- unsigned DeadDef = DeadDefs.back();
- DeadDefs.pop_back();
- RemoveDeadDef(LIS->getInterval(DeadDef), MI);
- }
- LIS->RemoveMachineInstrFromMaps(mii);
- mii = mbbi->erase(mii);
- continue;
- } else
- DeadDefs.clear();
- }
-
- ++mii;
-
- // Check for now unnecessary kill flags.
- if (LIS->isNotInMIMap(MI)) continue;
- SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isKill()) continue;
- unsigned reg = MO.getReg();
- if (!reg || !LIS->hasInterval(reg)) continue;
- if (!LIS->getInterval(reg).killedAt(DefIdx)) {
- MO.setIsKill(false);
- continue;
- }
- // When leaving a kill flag on a physreg, check if any subregs should
- // remain alive.
- if (!TargetRegisterInfo::isPhysicalRegister(reg))
- continue;
- for (const uint16_t *SR = TRI->getSubRegisters(reg);
- unsigned S = *SR; ++SR)
- if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
- MI->addRegisterDefined(S, TRI);
- }
- }
- }
+ if (EnableJoining)
+ joinAllIntervals();
// After deleting a lot of copies, register classes may be less constrained.
- // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
// DPR inflation.
array_pod_sort(InflateRegs.begin(), InflateRegs.end());
InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 310b933..8a6df98 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -26,7 +26,6 @@ namespace llvm {
/// two registers can be coalesced, CoalescerPair can determine if a copy
/// instruction would become an identity copy after coalescing.
class CoalescerPair {
- const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
/// DstReg - The register that will be left after coalescing. It can be a
@@ -36,10 +35,13 @@ namespace llvm {
/// SrcReg - the virtual register that will be coalesced into dstReg.
unsigned SrcReg;
- /// subReg_ - The subregister index of srcReg in DstReg. It is possible the
- /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a
- /// virtual register.
- unsigned SubIdx;
+ /// DstIdx - The sub-register index of the old DstReg in the new coalesced
+ /// register.
+ unsigned DstIdx;
+
+ /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced
+ /// register.
+ unsigned SrcIdx;
/// Partial - True when the original copy was a partial subregister copy.
bool Partial;
@@ -52,12 +54,13 @@ namespace llvm {
bool Flipped;
/// NewRC - The register class of the coalesced register, or NULL if DstReg
- /// is a physreg.
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
const TargetRegisterClass *NewRC;
public:
- CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
- : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0),
+ CoalescerPair(const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
/// setRegisters - set registers to match the copy instruction MI. Return
@@ -94,9 +97,13 @@ namespace llvm {
/// getSrcReg - Return the virtual register that will be coalesced away.
unsigned getSrcReg() const { return SrcReg; }
- /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
- /// coalesced into, or 0.
- unsigned getSubIdx() const { return SubIdx; }
+ /// getDstIdx - Return the subregister index that DstReg will be coalesced
+ /// into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// getSrcIdx - Return the subregister index that SrcReg will be coalesced
+ /// into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
/// getNewRC - Return the register class of the coalesced register.
const TargetRegisterClass *getNewRC() const { return NewRC; }
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 0000000..43448c8
--- /dev/null
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,841 @@
+//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// Increase register pressure for each set impacted by this register class.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ std::vector<unsigned> &MaxSetPressure,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ for (const int *PSet = TRI->getRegClassPressureSets(RC);
+ *PSet != -1; ++PSet) {
+ CurrSetPressure[*PSet] += Weight;
+ if (&CurrSetPressure != &MaxSetPressure
+ && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
+ MaxSetPressure[*PSet] = CurrSetPressure[*PSet];
+ }
+ }
+}
+
+/// Decrease register pressure for each set impacted by this register class.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ for (const int *PSet = TRI->getRegClassPressureSets(RC);
+ *PSet != -1; ++PSet) {
+ assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSet] -= Weight;
+ }
+}
+
+/// Directly increase pressure only within this RegisterPressure result.
+void RegisterPressure::increase(const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI);
+}
+
+/// Directly decrease pressure only within this RegisterPressure result.
+void RegisterPressure::decrease(const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) {
+ decreaseSetPressure(MaxSetPressure, RC, TRI);
+}
+
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
+ dbgs() << "Live In: ";
+ for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
+ dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
+ dbgs() << '\n';
+ for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) {
+ if (MaxSetPressure[i] != 0)
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i]
+ << '\n';
+ }
+}
+
+/// Increase the current pressure as impacted by these physical registers and
+/// bump the high water mark if needed.
+void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ TRI->getMinimalPhysRegClass(Regs[I]), TRI);
+}
+
+/// Simply decrease the current pressure as impacted by these physcial
+/// registers.
+void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]),
+ TRI);
+}
+
+/// Increase the current pressure as impacted by these virtual registers and
+/// bump the high water mark if needed.
+void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+ MRI->getRegClass(Regs[I]), TRI);
+}
+
+/// Simply decrease the current pressure as impacted by these virtual registers.
+void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) {
+ for (unsigned I = 0, E = Regs.size(); I != E; ++I)
+ decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI);
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos)
+{
+ MF = mf;
+ TRI = MF->getTarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue())
+ ++CurrPos;
+
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+ P.MaxSetPressure = CurrSetPressure;
+
+ LivePhysRegs.clear();
+ LivePhysRegs.setUniverse(TRI->getNumRegs());
+ LiveVirtRegs.clear();
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx =
+ LIS->getInstructionIndex(CurrPos).getRegSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
+ P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+ P.LiveInRegs.push_back(*I);
+ std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
+ P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
+ P.LiveInRegs.end());
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ if (CurrPos == MBB->end())
+ static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB);
+ else
+ static_cast<IntervalPressure&>(P).BottomIdx =
+ LIS->getInstructionIndex(CurrPos).getRegSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
+ P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+ for (SparseSet<unsigned>::const_iterator I =
+ LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+ P.LiveOutRegs.push_back(*I);
+ std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
+ P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
+ P.LiveOutRegs.end());
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LivePhysRegs.empty() && LiveVirtRegs.empty() &&
+ "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// Return true if Reg aliases a register in Regs SparseSet.
+static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs");
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (Regs.count(*AI))
+ return true;
+ return false;
+}
+
+/// Return true if Reg aliases a register in unsorted Regs SmallVector.
+/// This is only valid for physical registers.
+static SmallVectorImpl<unsigned>::iterator
+findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ SmallVectorImpl<unsigned>::iterator I =
+ std::find(Regs.begin(), Regs.end(), *AI);
+ if (I != Regs.end())
+ return I;
+ }
+ return Regs.end();
+}
+
+/// Return true if Reg can be inserted into Regs SmallVector. For virtual
+/// register, do a linear search. For physical registers check for aliases.
+static SmallVectorImpl<unsigned>::iterator
+findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs,
+ const TargetRegisterInfo *TRI) {
+ if(isVReg)
+ return std::find(Regs.begin(), Regs.end(), Reg);
+ return findRegAlias(Reg, Regs, TRI);
+}
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+template<bool isVReg>
+struct RegisterOperands {
+ SmallVector<unsigned, 8> Uses;
+ SmallVector<unsigned, 8> Defs;
+ SmallVector<unsigned, 8> DeadDefs;
+
+ /// Push this operand's register onto the correct vector.
+ void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) {
+ if (MO.readsReg()) {
+ if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end())
+ Uses.push_back(MO.getReg());
+ }
+ if (MO.isDef()) {
+ if (MO.isDead()) {
+ if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
+ DeadDefs.push_back(MO.getReg());
+ }
+ else {
+ if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
+ Defs.push_back(MO.getReg());
+ }
+ }
+ }
+};
+typedef RegisterOperands<false> PhysRegOperands;
+typedef RegisterOperands<true> VirtRegOperands;
+
+/// Collect physical and virtual register operands.
+static void collectOperands(const MachineInstr *MI,
+ PhysRegOperands &PhysRegOpers,
+ VirtRegOperands &VirtRegOpers,
+ const TargetRegisterInfo *TRI,
+ const RegisterClassInfo *RCI) {
+ for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
+ const MachineOperand &MO = *OperI;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegOpers.collect(MO, TRI);
+ else if (RCI->isAllocatable(MO.getReg()))
+ PhysRegOpers.collect(MO, TRI);
+ }
+ // Remove redundant physreg dead defs.
+ for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) {
+ unsigned Reg = PhysRegOpers.DeadDefs[i-1];
+ if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end())
+ PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]);
+ }
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ if (TargetRegisterInfo::isVirtualRegister(Regs[i])) {
+ if (LiveVirtRegs.insert(Regs[i]).second)
+ increaseVirtRegPressure(Regs[i]);
+ }
+ else {
+ if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) {
+ LivePhysRegs.insert(Regs[i]);
+ increasePhysRegPressure(Regs[i]);
+ }
+ }
+ }
+}
+
+/// Add PhysReg to the live in set and increase max pressure.
+void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) {
+ assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end())
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+}
+
+/// Add PhysReg to the live out set and increase max pressure.
+void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) {
+ assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end())
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
+}
+
+/// Add VirtReg to the live in set and increase max pressure.
+void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) {
+ assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) !=
+ P.LiveInRegs.end())
+ return;
+
+ // At live in discovery, unconditionally increase the high water mark.
+ P.LiveInRegs.push_back(Reg);
+ P.increase(MRI->getRegClass(Reg), TRI);
+}
+
+/// Add VirtReg to the live out set and increase max pressure.
+void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) {
+ assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
+ if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) !=
+ P.LiveOutRegs.end())
+ return;
+
+ // At live out discovery, unconditionally increase the high water mark.
+ P.LiveOutRegs.push_back(Reg);
+ P.increase(MRI->getRegClass(Reg), TRI);
+}
+
+/// Recede across the previous instruction.
+bool RegPressureTracker::recede() {
+ // Check for the top of the analyzable region.
+ if (CurrPos == MBB->begin()) {
+ closeRegion();
+ return false;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ if (CurrPos->isDebugValue()) {
+ closeRegion();
+ return false;
+ }
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Defs[i];
+ if (LivePhysRegs.erase(Reg))
+ decreasePhysRegPressure(Reg);
+ else
+ discoverPhysLiveOut(Reg);
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Defs[i];
+ if (LiveVirtRegs.erase(Reg))
+ decreaseVirtRegPressure(Reg);
+ else
+ discoverVirtLiveOut(Reg);
+ }
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
+ increasePhysRegPressure(Reg);
+ LivePhysRegs.insert(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (!LiveVirtRegs.count(Reg)) {
+ // Adjust liveouts if LiveIntervals are available.
+ if (RequireIntervals) {
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ if (!LI->killedAt(SlotIdx))
+ discoverVirtLiveOut(Reg);
+ }
+ increaseVirtRegPressure(Reg);
+ LiveVirtRegs.insert(Reg);
+ }
+ }
+ return true;
+}
+
+/// Advance across the current instruction.
+bool RegPressureTracker::advance() {
+ // Check for the bottom of the analyzable region.
+ if (CurrPos == MBB->end()) {
+ closeRegion();
+ return false;
+ }
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Kill liveness at last uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI))
+ discoverPhysLiveIn(Reg);
+ else {
+ // Allocatable physregs are always single-use before regalloc.
+ decreasePhysRegPressure(Reg);
+ LivePhysRegs.erase(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (RequireIntervals) {
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ if (LI->killedAt(SlotIdx)) {
+ if (LiveVirtRegs.erase(Reg))
+ decreaseVirtRegPressure(Reg);
+ else
+ discoverVirtLiveIn(Reg);
+ }
+ }
+ else if (!LiveVirtRegs.count(Reg)) {
+ discoverVirtLiveIn(Reg);
+ increaseVirtRegPressure(Reg);
+ }
+ }
+
+ // Generate liveness for defs.
+ for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Defs[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
+ increasePhysRegPressure(Reg);
+ LivePhysRegs.insert(Reg);
+ }
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Defs[i];
+ if (LiveVirtRegs.insert(Reg).second)
+ increaseVirtRegPressure(Reg);
+ }
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Find the next instruction.
+ do
+ ++CurrPos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+ return true;
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const TargetRegisterInfo *TRI) {
+ int ExcessUnits = 0;
+ unsigned PSetID = ~0U;
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ }
+ else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (std::abs(PDiff) > std::abs(ExcessUnits)) {
+ ExcessUnits = PDiff;
+ PSetID = i;
+ }
+ }
+ Delta.Excess.PSetID = PSetID;
+ Delta.Excess.UnitIncrease = ExcessUnits;
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureElement();
+ Delta.CurrentMax = PressureElement();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease;
+ if (PDiff > Delta.CriticalMax.UnitIncrease) {
+ Delta.CriticalMax.PSetID = i;
+ Delta.CriticalMax.UnitIncrease = PDiff;
+ }
+ }
+
+ // Find the greatest increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ int MDiff = (int)PNew - (int)MaxPressureLimit[i];
+ if (MDiff > Delta.CurrentMax.UnitIncrease) {
+ Delta.CurrentMax.PSetID = i;
+ Delta.CurrentMax.UnitIncrease = PNew;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ // Account for register pressure similar to RegPressureTracker::recede().
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ decreasePhysRegPressure(PhysRegOpers.Defs);
+ decreaseVirtRegPressure(VirtRegOpers.Defs);
+
+ // Generate liveness for uses.
+ for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Uses[i];
+ if (!hasRegAlias(Reg, LivePhysRegs, TRI))
+ increasePhysRegPressure(Reg);
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ if (!LiveVirtRegs.count(Reg))
+ increaseVirtRegPressure(Reg);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// FIXME: This is expensive for an on-the-fly query. We need to cache the
+/// result per-SUnit with enough information to adjust for the current
+/// scheduling position. But this works as a proof of concept.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+static bool findUseBetween(unsigned Reg,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+ return true;
+ }
+ return false;
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ // Account for register pressure similar to RegPressureTracker::recede().
+ PhysRegOperands PhysRegOpers;
+ VirtRegOperands VirtRegOpers;
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+
+ // Kill liveness at last uses. Assume allocatable physregs are single-use
+ // rather than checking LiveIntervals.
+ decreasePhysRegPressure(PhysRegOpers.Uses);
+ if (RequireIntervals) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+ for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Uses[i];
+ const LiveInterval *LI = &LIS->getInterval(Reg);
+ // FIXME: allow the caller to pass in the list of vreg uses that remain to
+ // be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+ if (LI->killedAt(SlotIdx)
+ && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+ decreaseVirtRegPressure(Reg);
+ }
+ }
+ }
+
+ // Generate liveness for defs.
+ increasePhysRegPressure(PhysRegOpers.Defs);
+ increaseVirtRegPressure(VirtRegOpers.Defs);
+
+ // Boost pressure for all dead defs together.
+ increasePhysRegPressure(PhysRegOpers.DeadDefs);
+ increaseVirtRegPressure(VirtRegOpers.DeadDefs);
+ decreasePhysRegPressure(PhysRegOpers.DeadDefs);
+ decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureElement> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.UnitIncrease >= 0 &&
+ Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 03bd82e..d673794 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,16 +37,13 @@ using namespace llvm;
void RegScavenger::setUsed(unsigned Reg) {
RegsAvailable.reset(Reg);
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- RegsAvailable.reset(SubReg);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RegsAvailable.reset(*SubRegs);
}
bool RegScavenger::isAliasUsed(unsigned Reg) const {
- if (isUsed(Reg))
- return true;
- for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
- if (isUsed(*R))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (isUsed(*AI))
return true;
return false;
}
@@ -114,8 +111,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
BV.set(Reg);
- for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
- BV.set(*R);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ BV.set(*SubRegs);
}
void RegScavenger::forward() {
@@ -195,9 +192,8 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
- unsigned SubReg = *SubRegs; ++SubRegs)
- if (isUsed(SubReg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ if (isUsed(*SubRegs)) {
SubUsed = true;
break;
}
@@ -296,9 +292,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
isVirtKillInsn = true;
continue;
}
- Candidates.reset(MO.getReg());
- for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
- Candidates.reset(*R);
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
}
// If we're not in a virtual reg's live range, this is a valid
// restore point.
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
deleted file mode 100644
index 6020908..0000000
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ /dev/null
@@ -1,1013 +0,0 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "rendermf"
-
-#include "RenderMachineFunction.h"
-
-#include "VirtRegMap.h"
-
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <sstream>
-
-using namespace llvm;
-
-char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
- "Render machine functions (and related info) to HTML pages",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
- "Render machine functions (and related info) to HTML pages",
- false, false)
-
-static cl::opt<std::string>
-outputFileSuffix("rmf-file-suffix",
- cl::desc("Appended to function name to get output file name "
- "(default: \".html\")"),
- cl::init(".html"), cl::Hidden);
-
-static cl::opt<std::string>
-machineFuncsToRender("rmf-funcs",
- cl::desc("Comma separated list of functions to render"
- ", or \"*\"."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<std::string>
-pressureClasses("rmf-classes",
- cl::desc("Register classes to render pressure for."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<std::string>
-showIntervals("rmf-intervals",
- cl::desc("Live intervals to show alongside code."),
- cl::init(""), cl::Hidden);
-
-static cl::opt<bool>
-filterEmpty("rmf-filter-empty-intervals",
- cl::desc("Don't display empty intervals."),
- cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
-showEmptyIndexes("rmf-empty-indexes",
- cl::desc("Render indexes not associated with instructions or "
- "MBB starts."),
- cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-useFancyVerticals("rmf-fancy-verts",
- cl::desc("Use SVG for vertical text."),
- cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
-prettyHTML("rmf-pretty-html",
- cl::desc("Pretty print HTML. For debugging the renderer only.."),
- cl::init(false), cl::Hidden);
-
-
-namespace llvm {
-
- bool MFRenderingOptions::renderingOptionsProcessed;
- std::set<std::string> MFRenderingOptions::mfNamesToRender;
- bool MFRenderingOptions::renderAllMFs = false;
-
- std::set<std::string> MFRenderingOptions::classNamesToRender;
- bool MFRenderingOptions::renderAllClasses = false;
-
- std::set<std::pair<unsigned, unsigned> >
- MFRenderingOptions::intervalNumsToRender;
- unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
-
- template <typename OutputItr>
- void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
- OutputItr outItr) {
- std::string::const_iterator curPos = s.begin();
- std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
- while (nextComa != s.end()) {
- std::string elem;
- std::copy(curPos, nextComa, std::back_inserter(elem));
- *outItr = elem;
- ++outItr;
- curPos = llvm::next(nextComa);
- nextComa = std::find(curPos, s.end(), ',');
- }
-
- if (curPos != s.end()) {
- std::string elem;
- std::copy(curPos, s.end(), std::back_inserter(elem));
- *outItr = elem;
- ++outItr;
- }
- }
-
- void MFRenderingOptions::processOptions() {
- if (!renderingOptionsProcessed) {
- processFuncNames();
- processRegClassNames();
- processIntervalNumbers();
- renderingOptionsProcessed = true;
- }
- }
-
- void MFRenderingOptions::processFuncNames() {
- if (machineFuncsToRender == "*") {
- renderAllMFs = true;
- } else {
- splitComaSeperatedList(machineFuncsToRender,
- std::inserter(mfNamesToRender,
- mfNamesToRender.begin()));
- }
- }
-
- void MFRenderingOptions::processRegClassNames() {
- if (pressureClasses == "*") {
- renderAllClasses = true;
- } else {
- splitComaSeperatedList(pressureClasses,
- std::inserter(classNamesToRender,
- classNamesToRender.begin()));
- }
- }
-
- void MFRenderingOptions::processIntervalNumbers() {
- std::set<std::string> intervalRanges;
- splitComaSeperatedList(showIntervals,
- std::inserter(intervalRanges,
- intervalRanges.begin()));
- std::for_each(intervalRanges.begin(), intervalRanges.end(),
- processIntervalRange);
- }
-
- void MFRenderingOptions::processIntervalRange(
- const std::string &intervalRangeStr) {
- if (intervalRangeStr == "*") {
- intervalTypesToRender |= All;
- } else if (intervalRangeStr == "virt-nospills*") {
- intervalTypesToRender |= VirtNoSpills;
- } else if (intervalRangeStr == "spills*") {
- intervalTypesToRender |= VirtSpills;
- } else if (intervalRangeStr == "virt*") {
- intervalTypesToRender |= AllVirt;
- } else if (intervalRangeStr == "phys*") {
- intervalTypesToRender |= AllPhys;
- } else {
- std::istringstream iss(intervalRangeStr);
- unsigned reg1, reg2;
- if ((iss >> reg1 >> std::ws)) {
- if (iss.eof()) {
- intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
- } else {
- char c;
- iss >> c;
- if (c == '-' && (iss >> reg2)) {
- intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
- } else {
- dbgs() << "Warning: Invalid interval range \""
- << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
- }
- }
- } else {
- dbgs() << "Warning: Invalid interval number \""
- << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
- }
- }
- }
-
- void MFRenderingOptions::setup(MachineFunction *mf,
- const TargetRegisterInfo *tri,
- LiveIntervals *lis,
- const RenderMachineFunction *rmf) {
- this->mf = mf;
- this->tri = tri;
- this->lis = lis;
- this->rmf = rmf;
-
- clear();
- }
-
- void MFRenderingOptions::clear() {
- regClassesTranslatedToCurrentFunction = false;
- regClassSet.clear();
-
- intervalsTranslatedToCurrentFunction = false;
- intervalSet.clear();
- }
-
- void MFRenderingOptions::resetRenderSpecificOptions() {
- intervalSet.clear();
- intervalsTranslatedToCurrentFunction = false;
- }
-
- bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
- processOptions();
-
- return (renderAllMFs ||
- mfNamesToRender.find(mf->getFunction()->getName()) !=
- mfNamesToRender.end());
- }
-
- const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
- translateRegClassNamesToCurrentFunction();
- return regClassSet;
- }
-
- const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
- translateIntervalNumbersToCurrentFunction();
- return intervalSet;
- }
-
- bool MFRenderingOptions::renderEmptyIndexes() const {
- return showEmptyIndexes;
- }
-
- bool MFRenderingOptions::fancyVerticals() const {
- return useFancyVerticals;
- }
-
- void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
- if (!regClassesTranslatedToCurrentFunction) {
- processOptions();
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- if (renderAllClasses ||
- classNamesToRender.find(trc->getName()) !=
- classNamesToRender.end()) {
- regClassSet.insert(trc);
- }
- }
- regClassesTranslatedToCurrentFunction = true;
- }
- }
-
- void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
- if (!intervalsTranslatedToCurrentFunction) {
- processOptions();
-
- // If we're not just doing explicit then do a copy over all matching
- // types.
- if (intervalTypesToRender != ExplicitOnly) {
- for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (filterEmpty && li->empty())
- continue;
-
- if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
- (intervalTypesToRender & AllPhys))) {
- intervalSet.insert(li);
- } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
- if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
- ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
- intervalSet.insert(li);
- }
- }
- }
- }
-
- // If we need to process the explicit list...
- if (intervalTypesToRender != All) {
- for (std::set<std::pair<unsigned, unsigned> >::const_iterator
- regRangeItr = intervalNumsToRender.begin(),
- regRangeEnd = intervalNumsToRender.end();
- regRangeItr != regRangeEnd; ++regRangeItr) {
- const std::pair<unsigned, unsigned> &range = *regRangeItr;
- for (unsigned reg = range.first; reg != range.second; ++reg) {
- if (lis->hasInterval(reg)) {
- intervalSet.insert(&lis->getInterval(reg));
- }
- }
- }
- }
-
- intervalsTranslatedToCurrentFunction = true;
- }
- }
-
- // ---------- TargetRegisterExtraInformation implementation ----------
-
- TargetRegisterExtraInfo::TargetRegisterExtraInfo()
- : mapsPopulated(false) {
- }
-
- void TargetRegisterExtraInfo::setup(MachineFunction *mf,
- MachineRegisterInfo *mri,
- const TargetRegisterInfo *tri,
- LiveIntervals *lis) {
- this->mf = mf;
- this->mri = mri;
- this->tri = tri;
- this->lis = lis;
- }
-
- void TargetRegisterExtraInfo::reset() {
- if (!mapsPopulated) {
- initWorst();
- //initBounds();
- initCapacity();
- mapsPopulated = true;
- }
-
- resetPressureAndLiveStates();
- }
-
- void TargetRegisterExtraInfo::clear() {
- prWorst.clear();
- vrWorst.clear();
- capacityMap.clear();
- pressureMap.clear();
- //liveStatesMap.clear();
- mapsPopulated = false;
- }
-
- void TargetRegisterExtraInfo::initWorst() {
- assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
- "Worst map already initialised?");
-
- // Start with the physical registers.
- for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
- WorstMapLine &pregLine = prWorst[preg];
-
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
-
- unsigned numOverlaps = 0;
- for (TargetRegisterClass::iterator rItr = trc->begin(),
- rEnd = trc->end();
- rItr != rEnd; ++rItr) {
- unsigned trcPReg = *rItr;
- if (tri->regsOverlap(preg, trcPReg))
- ++numOverlaps;
- }
-
- pregLine[trc] = numOverlaps;
- }
- }
-
- // Now the register classes.
- for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rc1Itr != rcEnd; ++rc1Itr) {
- const TargetRegisterClass *trc1 = *rc1Itr;
- WorstMapLine &classLine = vrWorst[trc1];
-
- for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
- rc2Itr != rcEnd; ++rc2Itr) {
- const TargetRegisterClass *trc2 = *rc2Itr;
-
- unsigned worst = 0;
-
- for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
- trc1End = trc1->end();
- trc1Itr != trc1End; ++trc1Itr) {
- unsigned trc1Reg = *trc1Itr;
- unsigned trc1RegWorst = 0;
-
- for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
- trc2End = trc2->end();
- trc2Itr != trc2End; ++trc2Itr) {
- unsigned trc2Reg = *trc2Itr;
- if (tri->regsOverlap(trc1Reg, trc2Reg))
- ++trc1RegWorst;
- }
- if (trc1RegWorst > worst) {
- worst = trc1RegWorst;
- }
- }
-
- if (worst != 0) {
- classLine[trc2] = worst;
- }
- }
- }
- }
-
- unsigned TargetRegisterExtraInfo::getWorst(
- unsigned reg,
- const TargetRegisterClass *trc) const {
- const WorstMapLine *wml = 0;
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- PRWorstMap::const_iterator prwItr = prWorst.find(reg);
- assert(prwItr != prWorst.end() && "Missing prWorst entry.");
- wml = &prwItr->second;
- } else {
- const TargetRegisterClass *regTRC = mri->getRegClass(reg);
- VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
- assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
- wml = &vrwItr->second;
- }
-
- WorstMapLine::const_iterator wmlItr = wml->find(trc);
- if (wmlItr == wml->end())
- return 0;
-
- return wmlItr->second;
- }
-
- void TargetRegisterExtraInfo::initCapacity() {
- assert(!mapsPopulated && capacityMap.empty() &&
- "Capacity map already initialised?");
-
- for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- unsigned capacity = trc->getRawAllocationOrder(*mf).size();
-
- if (capacity != 0)
- capacityMap[trc] = capacity;
- }
- }
-
- unsigned TargetRegisterExtraInfo::getCapacity(
- const TargetRegisterClass *trc) const {
- CapacityMap::const_iterator cmItr = capacityMap.find(trc);
- assert(cmItr != capacityMap.end() &&
- "vreg with unallocable register class");
- return cmItr->second;
- }
-
- void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
- pressureMap.clear();
- //liveStatesMap.clear();
-
- // Iterate over all slots.
-
-
- // Iterate over all live intervals.
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (TargetRegisterInfo::isPhysicalRegister(li->reg))
- continue;
-
- // For all ranges in the current interal.
- for (LiveInterval::iterator lrItr = li->begin(),
- lrEnd = li->end();
- lrItr != lrEnd; ++lrItr) {
- LiveRange *lr = &*lrItr;
-
- // For all slots in the current range.
- for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
-
- // Record increased pressure at index for all overlapping classes.
- for (TargetRegisterInfo::regclass_iterator
- rcItr = tri->regclass_begin(),
- rcEnd = tri->regclass_end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
-
- if (trc->getRawAllocationOrder(*mf).empty())
- continue;
-
- unsigned worstAtI = getWorst(li->reg, trc);
-
- if (worstAtI != 0) {
- pressureMap[i][trc] += worstAtI;
- }
- }
- }
- }
- }
- }
-
- unsigned TargetRegisterExtraInfo::getPressureAtSlot(
- const TargetRegisterClass *trc,
- SlotIndex i) const {
- PressureMap::const_iterator pmItr = pressureMap.find(i);
- if (pmItr == pressureMap.end())
- return 0;
- const PressureMapLine &pmLine = pmItr->second;
- PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
- if (pmlItr == pmLine.end())
- return 0;
- return pmlItr->second;
- }
-
- bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
- const TargetRegisterClass *trc,
- SlotIndex i) const {
- return (getPressureAtSlot(trc, i) > getCapacity(trc));
- }
-
- // ---------- MachineFunctionRenderer implementation ----------
-
- void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
- if (!prettyHTML)
- return;
- for (unsigned i = 0; i < ns; ++i) {
- os << " ";
- }
- }
-
- RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
- return Spacer(ns);
- }
-
- raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
- s.print(os);
- return os;
- }
-
- template <typename Iterator>
- std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
- std::string r;
-
- for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
- char c = *sItr;
-
- switch (c) {
- case '<': r.append("&lt;"); break;
- case '>': r.append("&gt;"); break;
- case '&': r.append("&amp;"); break;
- case ' ': r.append("&nbsp;"); break;
- case '\"': r.append("&quot;"); break;
- default: r.push_back(c); break;
- }
- }
-
- return r;
- }
-
- RenderMachineFunction::LiveState
- RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
- SlotIndex i) const {
- const MachineInstr *mi = sis->getInstructionFromIndex(i);
-
- // For uses/defs recorded use/def indexes override current liveness and
- // instruction operands (Only for the interval which records the indexes).
- // FIXME: This is all wrong, uses and defs share the same slots.
- if (i.isEarlyClobber() || i.isRegister()) {
- UseDefs::const_iterator udItr = useDefs.find(li);
- if (udItr != useDefs.end()) {
- const SlotSet &slotSet = udItr->second;
- if (slotSet.count(i)) {
- if (i.isEarlyClobber()) {
- return Used;
- }
- // else
- return Defined;
- }
- }
- }
-
- // If the slot is a load/store, or there's no info in the use/def set then
- // use liveness and instruction operand info.
- if (li->liveAt(i)) {
-
- if (mi == 0) {
- if (vrm == 0 ||
- (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
- return AliveReg;
- } else {
- return AliveStack;
- }
- } else {
- if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
- return Defined;
- } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
- return Used;
- } else {
- if (vrm == 0 ||
- (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
- return AliveReg;
- } else {
- return AliveStack;
- }
- }
- }
- }
- return Dead;
- }
-
- RenderMachineFunction::PressureState
- RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
- SlotIndex i) const {
- if (trei.getPressureAtSlot(trc, i) == 0) {
- return Zero;
- } else if (trei.classOverCapacityAtSlot(trc, i)){
- return High;
- }
- return Low;
- }
-
- /// \brief Render a machine instruction.
- void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
- const MachineInstr *mi) const {
- std::string s;
- raw_string_ostream oss(s);
- oss << *mi;
-
- os << escapeChars(oss.str());
- }
-
- template <typename T>
- void RenderMachineFunction::renderVertical(const Spacer &indent,
- raw_ostream &os,
- const T &t) const {
- if (ro.fancyVerticals()) {
- os << indent << "<object\n"
- << indent + s(2) << "class=\"obj\"\n"
- << indent + s(2) << "type=\"image/svg+xml\"\n"
- << indent + s(2) << "width=\"14px\"\n"
- << indent + s(2) << "height=\"55px\"\n"
- << indent + s(2) << "data=\"data:image/svg+xml,\n"
- << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
- << indent + s(6) << "<text x='-55' y='10' "
- "font-family='Courier' font-size='12' "
- "transform='rotate(-90)' "
- "text-rendering='optimizeSpeed' "
- "fill='#000'>" << t << "</text>\n"
- << indent + s(4) << "</svg>\">\n"
- << indent << "</object>\n";
- } else {
- std::ostringstream oss;
- oss << t;
- std::string tStr(oss.str());
-
- os << indent;
- for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
- tStrItr != tStrEnd; ++tStrItr) {
- os << *tStrItr << "<br/>";
- }
- os << "\n";
- }
- }
-
- void RenderMachineFunction::insertCSS(const Spacer &indent,
- raw_ostream &os) const {
- os << indent << "<style type=\"text/css\">\n"
- << indent + s(2) << "body { font-color: black; }\n"
- << indent + s(2) << "table.code td { font-family: monospace; "
- "border-width: 0px; border-style: solid; "
- "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
- << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
- << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
- << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
- << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
- << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
- << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
- << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
- << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
- << indent + s(2) << "table.code th { border-width: 0px; "
- "border-style: solid; }\n"
- << indent << "</style>\n";
- }
-
- void RenderMachineFunction::renderFunctionSummary(
- const Spacer &indent, raw_ostream &os,
- const char * const renderContextStr) const {
- os << indent << "<h1>Function: " << mf->getFunction()->getName()
- << "</h1>\n"
- << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
- }
-
-
- void RenderMachineFunction::renderPressureTableLegend(
- const Spacer &indent,
- raw_ostream &os) const {
- os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
- << indent << "<table class=\"code\">\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<th>Pressure</th><th>Description</th>"
- "<th>Appearance</th>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>No Pressure</td>"
- "<td>No physical registers of this class requested.</td>"
- "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>Low Pressure</td>"
- "<td>Sufficient physical registers to meet demand.</td>"
- "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent + s(2) << "<tr>\n"
- << indent + s(4) << "<td>High Pressure</td>"
- "<td>Potentially insufficient physical registers to meet demand.</td>"
- "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
- << indent + s(2) << "</tr>\n"
- << indent << "</table>\n";
- }
-
- template <typename CellType>
- void RenderMachineFunction::renderCellsWithRLE(
- const Spacer &indent, raw_ostream &os,
- const std::pair<CellType, unsigned> &rleAccumulator,
- const std::map<CellType, std::string> &cellTypeStrs) const {
-
- if (rleAccumulator.second == 0)
- return;
-
- typename std::map<CellType, std::string>::const_iterator ctsItr =
- cellTypeStrs.find(rleAccumulator.first);
-
- assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
-
- os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
- if (rleAccumulator.second > 1)
- os << " colspan=" << rleAccumulator.second;
- os << "></td>\n";
- }
-
-
- void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
- raw_ostream &os) const {
-
- std::map<LiveState, std::string> lsStrs;
- lsStrs[Dead] = "l-n";
- lsStrs[Defined] = "l-d";
- lsStrs[Used] = "l-u";
- lsStrs[AliveReg] = "l-r";
- lsStrs[AliveStack] = "l-s";
-
- std::map<PressureState, std::string> psStrs;
- psStrs[Zero] = "p-z";
- psStrs[Low] = "p-l";
- psStrs[High] = "p-h";
-
- // Open the table...
-
- os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
- << indent + s(2) << "<tr>\n";
-
- // Render the header row...
-
- os << indent + s(4) << "<th>index</th>\n"
- << indent + s(4) << "<th>instr</th>\n";
-
- // Render class names if necessary...
- if (!ro.regClasses().empty()) {
- for (MFRenderingOptions::RegClassSet::const_iterator
- rcItr = ro.regClasses().begin(),
- rcEnd = ro.regClasses().end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- os << indent + s(4) << "<th>\n";
- renderVertical(indent + s(6), os, trc->getName());
- os << indent + s(4) << "</th>\n";
- }
- }
-
- // FIXME: Is there a nicer way to insert space between columns in HTML?
- if (!ro.regClasses().empty() && !ro.intervals().empty())
- os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
-
- // Render interval numbers if necessary...
- if (!ro.intervals().empty()) {
- for (MFRenderingOptions::IntervalSet::const_iterator
- liItr = ro.intervals().begin(),
- liEnd = ro.intervals().end();
- liItr != liEnd; ++liItr) {
-
- const LiveInterval *li = *liItr;
- os << indent + s(4) << "<th>\n";
- renderVertical(indent + s(6), os, li->reg);
- os << indent + s(4) << "</th>\n";
- }
- }
-
- os << indent + s(2) << "</tr>\n";
-
- // End header row, start with the data rows...
-
- MachineInstr *mi = 0;
-
- // Data rows:
- for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
- i = i.getNextSlot()) {
-
- // Render the slot column.
- os << indent + s(2) << "<tr height=6ex>\n";
-
- // Render the code column.
- if (i.isBlock()) {
- MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
- mi = sis->getInstructionFromIndex(i);
-
- if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
- ro.renderEmptyIndexes()) {
- os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
- << indent + s(4) << "<td rowspan=4>\n";
-
- if (i == sis->getMBBStartIdx(mbb)) {
- os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
- } else if (mi != 0) {
- os << indent + s(6) << "&nbsp;&nbsp;";
- renderMachineInstr(os, mi);
- } else {
- // Empty interval - leave blank.
- }
- os << indent + s(4) << "</td>\n";
- } else {
- i = i.getDeadSlot(); // <- Will be incremented to the next index.
- continue;
- }
- }
-
- // Render the class columns.
- if (!ro.regClasses().empty()) {
- std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
- for (MFRenderingOptions::RegClassSet::const_iterator
- rcItr = ro.regClasses().begin(),
- rcEnd = ro.regClasses().end();
- rcItr != rcEnd; ++rcItr) {
- const TargetRegisterClass *trc = *rcItr;
- PressureState newPressure = getPressureStateAt(trc, i);
-
- if (newPressure == psRLEAccumulator.first) {
- ++psRLEAccumulator.second;
- } else {
- renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
- psRLEAccumulator.first = newPressure;
- psRLEAccumulator.second = 1;
- }
- }
- renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
- }
-
- // FIXME: Is there a nicer way to insert space between columns in HTML?
- if (!ro.regClasses().empty() && !ro.intervals().empty())
- os << indent + s(4) << "<td width=2em></td>\n";
-
- if (!ro.intervals().empty()) {
- std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
- for (MFRenderingOptions::IntervalSet::const_iterator
- liItr = ro.intervals().begin(),
- liEnd = ro.intervals().end();
- liItr != liEnd; ++liItr) {
- const LiveInterval *li = *liItr;
- LiveState newLiveness = getLiveStateAt(li, i);
-
- if (newLiveness == lsRLEAccumulator.first) {
- ++lsRLEAccumulator.second;
- } else {
- renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
- lsRLEAccumulator.first = newLiveness;
- lsRLEAccumulator.second = 1;
- }
- }
- renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
- }
- os << indent + s(2) << "</tr>\n";
- }
-
- os << indent << "</table>\n";
-
- if (!ro.regClasses().empty())
- renderPressureTableLegend(indent, os);
- }
-
- void RenderMachineFunction::renderFunctionPage(
- raw_ostream &os,
- const char * const renderContextStr) const {
- os << "<html>\n"
- << s(2) << "<head>\n"
- << s(4) << "<title>" << fqn << "</title>\n";
-
- insertCSS(s(4), os);
-
- os << s(2) << "<head>\n"
- << s(2) << "<body >\n";
-
- renderFunctionSummary(s(4), os, renderContextStr);
-
- os << s(4) << "<br/><br/><br/>\n";
-
- //renderLiveIntervalInfoTable(" ", os);
-
- os << s(4) << "<br/><br/><br/>\n";
-
- renderCodeTablePlusPI(s(4), os);
-
- os << s(2) << "</body>\n"
- << "</html>\n";
- }
-
- void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(au);
- }
-
- bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
-
- mf = &fn;
- mri = &mf->getRegInfo();
- tri = mf->getTarget().getRegisterInfo();
- lis = &getAnalysis<LiveIntervals>();
- sis = &getAnalysis<SlotIndexes>();
-
- trei.setup(mf, mri, tri, lis);
- ro.setup(mf, tri, lis, this);
- spillIntervals.clear();
- spillFor.clear();
- useDefs.clear();
-
- fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
- mf->getFunction()->getName().str();
-
- return false;
- }
-
- void RenderMachineFunction::releaseMemory() {
- trei.clear();
- ro.clear();
- spillIntervals.clear();
- spillFor.clear();
- useDefs.clear();
- }
-
- void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
-
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
- rEnd = mri->reg_end();
- rItr != rEnd; ++rItr) {
- const MachineInstr *mi = &*rItr;
- if (mi->readsRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
- }
- if (mi->definesRegister(li->reg)) {
- useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
- }
- }
- }
-
- void RenderMachineFunction::rememberSpills(
- const LiveInterval *li,
- const std::vector<LiveInterval*> &spills) {
-
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
- siEnd = spills.end();
- siItr != siEnd; ++siItr) {
- const LiveInterval *spill = *siItr;
- spillIntervals[li].insert(spill);
- spillFor[spill] = li;
- }
- }
-
- bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
- SpillForMap::const_iterator sfItr = spillFor.find(li);
- if (sfItr == spillFor.end())
- return false;
- return true;
- }
-
- void RenderMachineFunction::renderMachineFunction(
- const char *renderContextStr,
- const VirtRegMap *vrm,
- const char *renderSuffix) {
- if (!ro.shouldRenderCurrentMachineFunction())
- return;
-
- this->vrm = vrm;
- trei.reset();
-
- std::string rpFileName(mf->getFunction()->getName().str() +
- (renderSuffix ? renderSuffix : "") +
- outputFileSuffix);
-
- std::string errMsg;
- raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
-
- renderFunctionPage(outFile, renderContextStr);
-
- ro.resetRenderSpecificOptions();
- }
-
- std::string RenderMachineFunction::escapeChars(const std::string &s) const {
- return escapeChars(s.begin(), s.end());
- }
-
-}
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
deleted file mode 100644
index 8571992..0000000
--- a/lib/CodeGen/RenderMachineFunction.h
+++ /dev/null
@@ -1,338 +0,0 @@
-//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
-#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
-
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-
-namespace llvm {
-
- class LiveInterval;
- class LiveIntervals;
- class MachineInstr;
- class MachineRegisterInfo;
- class RenderMachineFunction;
- class TargetRegisterClass;
- class TargetRegisterInfo;
- class VirtRegMap;
- class raw_ostream;
-
- /// \brief Helper class to process rendering options. Tries to be as lazy as
- /// possible.
- class MFRenderingOptions {
- public:
-
- struct RegClassComp {
- bool operator()(const TargetRegisterClass *trc1,
- const TargetRegisterClass *trc2) const {
- std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
- return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
- trc2Name.begin(), trc2Name.end());
- }
- };
-
- typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
-
- struct IntervalComp {
- bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
- return li1->reg < li2->reg;
- }
- };
-
- typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
-
- /// Initialise the rendering options.
- void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
- LiveIntervals *lis, const RenderMachineFunction *rmf);
-
- /// Clear translations of options to the current function.
- void clear();
-
- /// Reset any options computed for this specific rendering.
- void resetRenderSpecificOptions();
-
- /// Should we render the current function.
- bool shouldRenderCurrentMachineFunction() const;
-
- /// Return the set of register classes to render pressure for.
- const RegClassSet& regClasses() const;
-
- /// Return the set of live intervals to render liveness for.
- const IntervalSet& intervals() const;
-
- /// Render indexes which are not associated with instructions / MBB starts.
- bool renderEmptyIndexes() const;
-
- /// Return whether or not to render using SVG for fancy vertical text.
- bool fancyVerticals() const;
-
- private:
-
- static bool renderingOptionsProcessed;
- static std::set<std::string> mfNamesToRender;
- static bool renderAllMFs;
-
- static std::set<std::string> classNamesToRender;
- static bool renderAllClasses;
-
-
- static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
- typedef enum { ExplicitOnly = 0,
- AllPhys = 1,
- VirtNoSpills = 2,
- VirtSpills = 4,
- AllVirt = 6,
- All = 7 }
- IntervalTypesToRender;
- static unsigned intervalTypesToRender;
-
- template <typename OutputItr>
- static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
-
- static void processOptions();
-
- static void processFuncNames();
- static void processRegClassNames();
- static void processIntervalNumbers();
-
- static void processIntervalRange(const std::string &intervalRangeStr);
-
- MachineFunction *mf;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
- const RenderMachineFunction *rmf;
-
- mutable bool regClassesTranslatedToCurrentFunction;
- mutable RegClassSet regClassSet;
-
- mutable bool intervalsTranslatedToCurrentFunction;
- mutable IntervalSet intervalSet;
-
- void translateRegClassNamesToCurrentFunction() const;
-
- void translateIntervalNumbersToCurrentFunction() const;
- };
-
- /// \brief Provide extra information about the physical and virtual registers
- /// in the function being compiled.
- class TargetRegisterExtraInfo {
- public:
- TargetRegisterExtraInfo();
-
- /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
- /// sources of information.
- void setup(MachineFunction *mf, MachineRegisterInfo *mri,
- const TargetRegisterInfo *tri, LiveIntervals *lis);
-
- /// \brief Recompute tables for changed function.
- void reset();
-
- /// \brief Free all tables in TargetRegisterExtraInfo.
- void clear();
-
- /// \brief Maximum number of registers from trc which alias reg.
- unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
-
- /// \brief Returns the number of allocable registers in trc.
- unsigned getCapacity(const TargetRegisterClass *trc) const;
-
- /// \brief Return the number of registers of class trc that may be
- /// needed at slot i.
- unsigned getPressureAtSlot(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- /// \brief Return true if the number of registers of type trc that may be
- /// needed at slot i is greater than the capacity of trc.
- bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- private:
-
- MachineFunction *mf;
- MachineRegisterInfo *mri;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
-
- typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
- typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
- VRWorstMap vrWorst;
-
- typedef std::map<unsigned, WorstMapLine> PRWorstMap;
- PRWorstMap prWorst;
-
- typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
- CapacityMap capacityMap;
-
- typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
- typedef std::map<SlotIndex, PressureMapLine> PressureMap;
- PressureMap pressureMap;
-
- bool mapsPopulated;
-
- /// \brief Initialise the 'worst' table.
- void initWorst();
-
- /// \brief Initialise the 'capacity' table.
- void initCapacity();
-
- /// \brief Initialise/Reset the 'pressure' and live states tables.
- void resetPressureAndLiveStates();
- };
-
- /// \brief Render MachineFunction objects and related information to a HTML
- /// page.
- class RenderMachineFunction : public MachineFunctionPass {
- public:
- static char ID;
-
- RenderMachineFunction() : MachineFunctionPass(ID) {
- initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
- virtual bool runOnMachineFunction(MachineFunction &fn);
-
- virtual void releaseMemory();
-
- void rememberUseDefs(const LiveInterval *li);
-
- void rememberSpills(const LiveInterval *li,
- const std::vector<LiveInterval*> &spills);
-
- bool isSpill(const LiveInterval *li) const;
-
- /// \brief Render this machine function to HTML.
- ///
- /// @param renderContextStr This parameter will be included in the top of
- /// the html file to explain where (in the
- /// codegen pipeline) this function was rendered
- /// from. Set it to something like
- /// "Pre-register-allocation".
- /// @param vrm If non-null the VRM will be queried to determine
- /// whether a virtual register was allocated to a
- /// physical register or spilled.
- /// @param renderFilePrefix This string will be appended to the function
- /// name (before the output file suffix) to enable
- /// multiple renderings from the same function.
- void renderMachineFunction(const char *renderContextStr,
- const VirtRegMap *vrm = 0,
- const char *renderSuffix = 0);
-
- private:
- class Spacer;
- friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
-
- std::string fqn;
-
- MachineFunction *mf;
- MachineRegisterInfo *mri;
- const TargetRegisterInfo *tri;
- LiveIntervals *lis;
- SlotIndexes *sis;
- const VirtRegMap *vrm;
-
- TargetRegisterExtraInfo trei;
- MFRenderingOptions ro;
-
-
-
- // Utilities.
- typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
- LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
-
- typedef enum { Zero, Low, High } PressureState;
- PressureState getPressureStateAt(const TargetRegisterClass *trc,
- SlotIndex i) const;
-
- typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
- SpillIntervals;
- SpillIntervals spillIntervals;
-
- typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
- SpillForMap spillFor;
-
- typedef std::set<SlotIndex> SlotSet;
- typedef std::map<const LiveInterval*, SlotSet> UseDefs;
- UseDefs useDefs;
-
- // ---------- Rendering methods ----------
-
- /// For inserting spaces when pretty printing.
- class Spacer {
- public:
- explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
- Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
- void print(raw_ostream &os) const;
- private:
- unsigned ns;
- };
-
- Spacer s(unsigned ns) const;
-
- template <typename Iterator>
- std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
-
- /// \brief Render a machine instruction.
- void renderMachineInstr(raw_ostream &os,
- const MachineInstr *mi) const;
-
- /// \brief Render vertical text.
- template <typename T>
- void renderVertical(const Spacer &indent,
- raw_ostream &os,
- const T &t) const;
-
- /// \brief Insert CSS layout info.
- void insertCSS(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render a brief summary of the function (including rendering
- /// context).
- void renderFunctionSummary(const Spacer &indent,
- raw_ostream &os,
- const char * const renderContextStr) const;
-
- /// \brief Render a legend for the pressure table.
- void renderPressureTableLegend(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render a consecutive set of HTML cells of the same class using
- /// the colspan attribute for run-length encoding.
- template <typename CellType>
- void renderCellsWithRLE(
- const Spacer &indent, raw_ostream &os,
- const std::pair<CellType, unsigned> &rleAccumulator,
- const std::map<CellType, std::string> &cellTypeStrs) const;
-
- /// \brief Render code listing, potentially with register pressure
- /// and live intervals shown alongside.
- void renderCodeTablePlusPI(const Spacer &indent,
- raw_ostream &os) const;
-
- /// \brief Render the HTML page representing the MachineFunction.
- void renderFunctionPage(raw_ostream &os,
- const char * const renderContextStr) const;
-
- std::string escapeChars(const std::string &s) const;
- };
-}
-
-#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 8fd6426..752f8e4 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -64,10 +64,27 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
/// specified node.
bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
- for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I)
- if (*I == D)
+ for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ if (I->overlaps(D)) {
+ // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
+ if (I->getLatency() < D.getLatency()) {
+ SUnit *PredSU = I->getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = *I;
+ ForwardD.setSUnit(this);
+ for (SmallVector<SDep, 4>::iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ if (*II == ForwardD) {
+ II->setLatency(D.getLatency());
+ break;
+ }
+ }
+ I->setLatency(D.getLatency());
+ }
return false;
+ }
+ }
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index d46eb89..9c1dba3 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,17 +21,24 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
+static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable use of AA during MI GAD construction"));
+
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt,
@@ -40,7 +47,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
- LoopRegs(MLI, MDT), FirstDbgValue(0) {
+ LoopRegs(MDT), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
@@ -126,7 +133,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
LoopRegs.Deps.clear();
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch())
@@ -134,7 +142,8 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
}
void ScheduleDAGInstrs::finishBlock() {
- // Nothing to do.
+ // Subclasses should no longer refer to the old block.
+ BB = 0;
}
/// Initialize the map with the number of registers.
@@ -159,7 +168,7 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned endcount) {
- BB = bb;
+ assert(bb == BB && "startBlock should set BB");
RegionBegin = begin;
RegionEnd = end;
EndIndex = endcount;
@@ -232,7 +241,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
unsigned DataLatency = SU->Latency;
- for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
continue;
std::vector<SUnit*> &UseList = Uses[*Alias];
@@ -261,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
// Adjust the dependence latency using operand def/use
// information (if any), and then allow the target to
// perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+ SDep dep(SU, SDep::Data, LDataLatency, *Alias);
if (!UnitLatencies) {
- computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+ dep.setLatency(Latency);
+
+ ST.adjustSchedDependency(SU, UseSU, dep);
}
UseSU->addPred(dep);
}
@@ -285,7 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
std::vector<SUnit *> &DefList = Defs[*Alias];
@@ -398,9 +411,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
unsigned Reg = MI->getOperand(OperIdx).getReg();
- // SSA defs do not have output/anti dependencies.
+ // Singly defined vregs do not have output/anti dependencies.
// The current operand is a def, so we have at least one.
- if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+ // Check here if there are any others...
+ if (MRI.hasOneDef(Reg))
return;
// Add output dependence to the next nearest def of this vreg.
@@ -410,7 +424,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// uses. We're conservative for now until we have a way to guarantee the uses
// are not eliminated sometime during scheduling. The output dependence edge
// is also useful if output latency exceeds def-use latency.
- VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI == VRegDefs.end())
VRegDefs.insert(VReg2SUnit(Reg, SU));
else {
@@ -436,10 +450,11 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
- SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
- LiveInterval *LI = &LIS->getInterval(Reg);
- VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ VNInfo *VNI = LRQ.valueIn();
+
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
+ assert(VNI && "No value to read by operand");
MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
// Phis and other noninstructions (after coalescing) have a NULL Def.
if (Def) {
@@ -449,11 +464,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Create a data dependence.
//
// TODO: Handle "special" address latencies cleanly.
- const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+ SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
if (!UnitLatencies) {
// Adjust the dependence latency using operand def/use information, then
// allow the target to perform its own adjustments.
- computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+ dep.setLatency(Latency);
+
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
}
@@ -462,11 +479,217 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
// Add antidependence to the following def of the vreg it uses.
- VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+ VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI != VRegDefs.end() && DefI->SU != SU)
DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
}
+/// Return true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
+ return true;
+ return false;
+}
+
+// This MI might have either incomplete info, or known to be unsafe
+// to deal with (i.e. volatile object).
+static inline bool isUnsafeMemoryObject(MachineInstr *MI,
+ const MachineFrameInfo *MFI) {
+ if (!MI || MI->memoperands_empty())
+ return true;
+ // We purposefully do no check for hasOneMemOperand() here
+ // in hope to trigger an assert downstream in order to
+ // finish implementation.
+ if ((*MI->memoperands_begin())->isVolatile() ||
+ MI->hasUnmodeledSideEffects())
+ return true;
+
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return true;
+
+ V = getUnderlyingObject(V);
+ if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ // Similarly to getUnderlyingObjectForInstr:
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return true;
+ }
+ // Does this pointer refer to a distinct and identifiable object?
+ if (!isIdentifiedObject(V))
+ return true;
+
+ return false;
+}
+
+/// This returns true if the two MIs need a chain edge betwee them.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
+static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ MachineInstr *MIa,
+ MachineInstr *MIb) {
+ // Cover a trivial case - no edge is need to itself.
+ if (MIa == MIb)
+ return false;
+
+ if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ return true;
+
+ // If we are dealing with two "normal" loads, we do not need an edge
+ // between them - they could be reordered.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
+ // To this point analysis is generic. From here on we do need AA.
+ if (!AA)
+ return true;
+
+ MachineMemOperand *MMOa = *MIa->memoperands_begin();
+ MachineMemOperand *MMOb = *MIb->memoperands_begin();
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ llvm_unreachable("Multiple memory operands.");
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasAnalysis::AliasResult AAResult = AA->alias(
+ AliasAnalysis::Location(MMOa->getValue(), Overlapa,
+ MMOa->getTBAAInfo()),
+ AliasAnalysis::Location(MMOb->getValue(), Overlapb,
+ MMOb->getTBAAInfo()));
+
+ return (AAResult != AliasAnalysis::NoAlias);
+}
+
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned
+iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSet<const SUnit*, 16> &Visited) {
+ if (!SUa || !SUb || SUb == ExitSU)
+ return *Depth;
+
+ // Remember visited nodes.
+ if (!Visited.insert(SUb))
+ return *Depth;
+ // If there is _some_ dependency already in place, do not
+ // descend any further.
+ // TODO: Need to make sure that if that dependency got eliminated or ignored
+ // for any reason in the future, we would not violate DAG topology.
+ // Currently it does not happen, but makes an implicit assumption about
+ // future implementation.
+ //
+ // Independently, if we encounter node that is some sort of global
+ // object (like a call) we already have full set of dependencies to it
+ // and we can stop descending.
+ if (SUa->isSucc(SUb) ||
+ isGlobalMemoryObject(AA, SUb->getInstr()))
+ return *Depth;
+
+ // If we do need an edge, or we have exceeded depth budget,
+ // add that edge to the predecessors chain of SUb,
+ // and stop descending.
+ if (*Depth > 200 ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ return *Depth;
+ }
+ // Track current depth.
+ (*Depth)++;
+ // Iterate over chain dependencies only.
+ for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ unsigned LatencyToLoad) {
+ if (!SU)
+ return;
+
+ SmallPtrSet<const SUnit*, 16> Visited;
+ unsigned Depth = 0;
+
+ for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+ I != IE; ++I) {
+ if (SU == *I)
+ continue;
+ if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
+ (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ }
+ // Now go through all the chain successors and iterate from them.
+ // Keep track of visited nodes.
+ for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+ JE = (*I)->Succs.end(); J != JE; ++J)
+ if (J->isCtrl())
+ iterateChainSucc (AA, MFI, SU, J->getSUnit(),
+ ExitSU, &Depth, Visited);
+ }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline
+void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ SUnit *SUa, SUnit *SUb,
+ std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
+ // If this is a false dependency,
+ // do not add the edge, but rememeber the rejected node.
+ if (!EnableAASchedMI ||
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
+ SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
+ isNormalMemory));
+ else {
+ // Duplicate entries should be ignored.
+ RejectList.insert(SUb);
+ DEBUG(dbgs() << "\tReject chain dep between SU("
+ << SUa->NodeNum << ") and SU("
+ << SUb->NodeNum << ")\n");
+ }
+}
+
/// Create an SUnit for each real instruction, numbered in top-down toplological
/// order. The instruction order A < B, implies that no edge exists from B to A.
///
@@ -502,7 +725,11 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
-void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+/// If RegPressure is non null, compute register pressure as a side effect. The
+/// DAG builder is an efficient place to do it because it already visits
+/// operands.
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+ RegPressureTracker *RPTracker) {
// Create an SUnit for each real instruction.
initSUnits();
@@ -518,6 +745,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// that are known not to alias
std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
@@ -553,6 +781,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
PrevMI = MI;
continue;
}
+ if (RPTracker) {
+ RPTracker->recede();
+ assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
+ }
assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
@@ -587,11 +819,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// after stack slots are lowered to actual addresses.
// TODO: Use an AliasAnalysis and do real alias-analysis queries, and
// produce more precise dependence information.
-#define STORE_LOAD_LATENCY 1
- unsigned TrueMemOrderLatency = 0;
- if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasVolatileMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
+ unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
+ if (isGlobalMemoryObject(AA, MI)) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
@@ -603,36 +832,48 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
}
- NonAliasMemDefs.clear();
- NonAliasMemUses.clear();
// Add SU to the barrier chain.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
BarrierChain = SU;
+ // This is a barrier event that acts as a pivotal node in the DAG,
+ // so it is safe to clear list of exposed nodes.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ RejectMemNodes.clear();
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
// fall-through
new_alias_chain:
// Chain all possibly aliasing memory references though SU.
- if (AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (AliasChain) {
+ unsigned ChainLatency = 0;
+ if (AliasChain->getInstr()->mayLoad())
+ ChainLatency = TrueMemOrderLatency;
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ ChainLatency);
+ }
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
- E = AliasMemDefs.end(); I != E; ++I) {
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- }
+ E = AliasMemDefs.end(); I != E; ++I)
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ TrueMemOrderLatency);
}
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
} else if (MI->mayStore()) {
bool MayAlias = true;
- TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
@@ -642,8 +883,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
I->second = SU;
} else {
if (MayAlias)
@@ -658,20 +899,28 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
- /*Reg=*/0, /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ TrueMemOrderLatency, true);
J->second.clear();
}
if (MayAlias) {
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ // But we also should check dependent instructions for the
+ // SU in question.
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
}
// Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
} else {
@@ -688,7 +937,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
/*isArtificial=*/true));
} else if (MI->mayLoad()) {
bool MayAlias = true;
- TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
} else {
@@ -700,8 +948,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, SUnit *>::iterator IE =
((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
if (MayAlias)
AliasMemUses[V].push_back(SU);
else
@@ -711,15 +958,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
// potentially aliasing stores.
for (std::map<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
}
-
+ if (MayAlias)
+ adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
}
@@ -735,8 +983,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
}
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
- // Compute the latency for the node.
- if (!InstrItins || InstrItins->isEmpty()) {
+ // Compute the latency for the node. We only provide a default for missing
+ // itineraries. Empty itineraries still have latency properties.
+ if (!InstrItins) {
SU->Latency = 1;
// Simplistic target-independent heuristic: assume that loads take
@@ -748,63 +997,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
}
}
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const {
- if (!InstrItins || InstrItins->isEmpty())
- return;
-
+unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
+ const SDep& dep,
+ bool FindMin) const {
// For a data dependency with a known register...
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
- return;
-
- const unsigned Reg = dep.getReg();
-
- // ... find the definition of the register in the defining
- // instruction
- MachineInstr *DefMI = Def->getInstr();
- int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
- if (DefIdx != -1) {
- const MachineOperand &MO = DefMI->getOperand(DefIdx);
- if (MO.isReg() && MO.isImplicit() &&
- DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
- // This is an implicit def, getOperandLatency() won't return the correct
- // latency. e.g.
- // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
- // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
- // What we want is to compute latency between def of %D6/%D7 and use of
- // %Q3 instead.
- unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
- if (DefMI->getOperand(Op2).isReg())
- DefIdx = Op2;
- }
- MachineInstr *UseMI = Use->getInstr();
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
- if (UseMI) {
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
- UseMI, i);
- Latency = std::max(Latency, UseCycle);
- }
- } else {
- // UseMI is null, then it must be a scheduling barrier.
- if (!InstrItins || InstrItins->isEmpty())
- return;
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
- }
+ return 1;
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
+ Use->getInstr(), dep.getReg(), FindMin);
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 3d22035..e675366 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -39,13 +39,11 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
DebugType = ParentDebugType;
#endif
- // Determine the maximum depth of any itinerary. This determines the
- // depth of the scoreboard. We always make the scoreboard at least 1
- // cycle deep to avoid dealing with the boundary condition.
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
unsigned ScoreboardDepth = 1;
if (ItinData && !ItinData->isEmpty()) {
- IssueWidth = ItinData->IssueWidth;
-
for (unsigned idx = 0; ; ++idx) {
if (ItinData->isEndMarker(idx))
break;
@@ -63,16 +61,26 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II,
// Find the next power-of-2 >= ItinDepth
while (ItinDepth > ScoreboardDepth) {
ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
}
}
- MaxLookAhead = ScoreboardDepth;
}
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
- << ScoreboardDepth << '\n');
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel->IssueWidth;
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
}
void ScoreboardHazardRecognizer::Reset() {
@@ -151,7 +159,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
if (!freeUnits) {
- DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+ DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
DEBUG(DAG->dumpNode(SU));
return Hazard;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index a6bdc3b..75e8167 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_library(LLVMSelectionDAG
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
+
+add_dependencies(LLVMSelectionDAG intrinsics_gen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0914c66..4e29879 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -215,6 +215,7 @@ namespace {
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
@@ -227,6 +228,9 @@ namespace {
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
@@ -328,15 +332,12 @@ namespace {
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
- explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+ explicit WorkListRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
DC.removeFromWorkList(N);
}
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
};
}
@@ -619,8 +620,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
-
+ DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
@@ -650,7 +650,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorkList(TLO.New.getNode());
@@ -707,9 +707,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
removeFromWorkList(Load);
DAG.DeleteNode(Load);
AddToWorkList(Trunc.getNode());
@@ -961,8 +960,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
removeFromWorkList(N);
DAG.DeleteNode(N);
AddToWorkList(Result.getNode());
@@ -1047,12 +1046,12 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
- DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, &OpV);
}
// Push the new node and any users onto the worklist
@@ -1131,6 +1130,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
@@ -1143,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -1325,10 +1328,12 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -1640,7 +1645,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
- N1.getOperand(0));
+ N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
@@ -2341,7 +2346,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeVectorOps) {
+ && Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
@@ -2528,7 +2533,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
- CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
}
// Fold the AND away, taking care not to fold to the old load node if we
@@ -2710,6 +2722,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+
return SDValue();
}
@@ -4526,8 +4566,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
N0.getValueType().getScalarType());
@@ -5012,6 +5054,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
@@ -5041,8 +5087,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
@@ -5225,7 +5270,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-
+ EVT IndexTy = N0->getOperand(1).getValueType();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
@@ -5233,7 +5278,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
N->getDebugLoc(), TrTy, V,
- DAG.getConstant(Index, MVT::i32));
+ DAG.getConstant(Index, IndexTy));
}
}
@@ -5607,7 +5652,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (FoldedVOp.getNode()) return FoldedVOp;
}
- // fold (fadd c1, c2) -> (fadd c1, c2)
+ // fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
@@ -5636,6 +5681,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
@@ -5645,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
@@ -5665,17 +5731,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
+ // (fsub x, x) -> 0.0 &
// (fsub x, (fadd x, y)) -> (fneg y) &
// (fsub x, (fadd y, x)) -> (fneg y)
if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, VT);
+
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
@@ -5689,6 +5759,40 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+
return SDValue();
}
@@ -5720,6 +5824,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath &&
ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
@@ -5753,6 +5860,26 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5893,6 +6020,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -5918,6 +6077,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
@@ -6071,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -6185,7 +6399,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
// Replace the uses of SRL with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -6214,7 +6428,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
removeFromWorkList(TheXor);
DAG.DeleteNode(TheXor);
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
@@ -6240,7 +6454,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
@@ -6431,21 +6645,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
DAG.DeleteNode(Ptr.getNode());
@@ -6559,13 +6769,10 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
@@ -6573,8 +6780,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Op);
DAG.DeleteNode(Op);
return true;
@@ -6609,7 +6815,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
if (N->use_empty()) {
removeFromWorkList(N);
@@ -6629,11 +6835,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -6955,8 +7160,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
AddToWorkList(NewLD.getNode());
AddToWorkList(NewVal.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
}
@@ -7013,8 +7217,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
AddToWorkList(NewLD.getNode());
AddToWorkList(NewST.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
}
@@ -7058,7 +7261,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Tmp;
switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP type");
- case MVT::f80: // We don't do this for these yet.
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
break;
@@ -7323,8 +7527,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
OrigElt -= NumElem;
}
+ EVT IndexTy = N->getOperand(1).getValueType();
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
- InVec, DAG.getConstant(OrigElt, MVT::i32));
+ InVec, DAG.getConstant(OrigElt, IndexTy));
}
// Perform only after legalization to ensure build_vector / vector_shuffle
@@ -7472,7 +7677,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explcitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorkList(Load.getNode());
@@ -7489,6 +7694,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7496,12 +7706,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
- bool AllUndef = true;
+
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
- AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -7529,9 +7738,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
AllAnyExt &= AnyExt;
}
- if (AllUndef)
- return DAG.getUNDEF(VT);
-
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
@@ -7707,6 +7913,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (N->getNumOperands() == 1)
return N->getOperand(0);
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
return SDValue();
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0c1ac69..683fac6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,6 +40,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "isel"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
@@ -51,10 +52,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -484,7 +485,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
// N = N + Offset
- TotalOffs +=
+ TotalOffs +=
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
if (TotalOffs >= MaxOffs) {
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
@@ -573,7 +574,10 @@ bool FastISel::SelectCall(const User *I) {
// At -O0 we don't care about the lifetime intrinsics.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
return true;
+
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
if (!DIVariable(DI->getVariable()).Verify() ||
@@ -642,7 +646,7 @@ bool FastISel::SelectCall(const User *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addCImm(CI).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
- else
+ else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addImm(CI->getZExtValue()).addImm(DI->getOffset())
.addMetadata(DI->getVariable());
@@ -786,13 +790,24 @@ FastISel::SelectInstruction(const Instruction *I) {
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+ }
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
- // Remove dead code. However, ignore call instructions since we've flushed
+ // Remove dead code. However, ignore call instructions since we've flushed
// the local value map and recomputed the insert point.
if (!isa<CallInst>(I)) {
recomputeInsertPt();
@@ -1037,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
@@ -1046,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo)
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- TRI(*TM.getRegisterInfo()) {
+ TRI(*TM.getRegisterInfo()),
+ LibInfo(libInfo) {
}
FastISel::~FastISel() {}
@@ -1306,6 +1323,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm1).addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm) {
@@ -1345,6 +1386,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(TargetOpcode::COPY), ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill), Idx);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8dde919..3e18ea7 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -15,13 +15,13 @@
#define DEBUG_TYPE "function-lowering-info"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 1467d88..4488d27 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
return N;
}
-/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// countOperands - The inputs to target nodes have any actual inputs first,
/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
-unsigned InstrEmitter::CountOperands(SDNode *Node) {
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ for (unsigned I = N; I; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
return N;
}
@@ -114,8 +129,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (User->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(User->getMachineOpcode());
const TargetRegisterClass *RC = 0;
- if (i+II.getNumDefs() < II.getNumOperands())
- RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
if (!UseRC)
UseRC = RC;
else if (RC) {
@@ -196,7 +213,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
// is a vreg in the same register class, use the CopyToReg'd destination
// register instead of creating a new vreg.
unsigned VRBase = 0;
- const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
unsigned NumResults = CountResults(Node);
@@ -293,7 +311,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
if (II) {
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
- DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+ DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
@@ -334,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
-/// operand number (in the II) that we are adding. IIOpNum and II are used for
-/// assertions only.
+/// operand number (in the II) that we are adding.
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const MCInstrDesc *II,
@@ -350,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
const ConstantFP *CFP = F->getConstantFPValue();
MI->addOperand(MachineOperand::CreateFPImm(CFP));
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
- MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp));
} else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
} else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
@@ -390,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
BA->getTargetFlags()));
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
+ TI->getOffset(),
+ TI->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Glue &&
@@ -458,7 +483,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
- SubIdx == DefSubIdx) {
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
// Optimize these:
// r1025 = s/zext r1024, 4
// r1026 = extract_subreg r1025, 4
@@ -467,6 +493,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
VRBase = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
} else {
// VReg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
@@ -548,7 +575,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
// Create the new VReg in the destination class and emit a copy.
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg).addReg(VReg);
@@ -566,7 +594,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
- unsigned NewVReg = MRI->createVirtualRegister(RC);
+ unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
unsigned NumOps = Node->getNumOperands();
@@ -691,7 +719,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
- unsigned NodeOperands = CountOperands(Node);
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands = countOperands(Node, NumImpUses);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
@@ -700,7 +729,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
"Too few operands for a variadic node!");
else
assert(NumMIOperands >= II.getNumOperands() &&
- NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
"#operands for dag node doesn't match .td file!");
#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c081f38..9eddee9 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -105,12 +105,6 @@ public:
/// (which do not go into the machine instrs.)
static unsigned CountResults(SDNode *Node);
- /// CountOperands - The inputs to target nodes have any actual inputs first,
- /// followed by an optional chain operand, then flag operands. Compute
- /// the number of actual operands that will go into the resulting
- /// MachineInstr.
- static unsigned CountOperands(SDNode *Node);
-
/// EmitDbgValue - Generate machine instruction for a dbg_value node.
///
MachineInstr *EmitDbgValue(SDDbgValue *SD,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a96a997..908ebb9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,7 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -20,10 +24,6 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -70,6 +70,9 @@ private:
SDValue OptimizeFloatStore(StoreSDNode *ST);
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
@@ -150,21 +153,21 @@ public:
// Node replacement helpers
void ReplacedNode(SDNode *N) {
if (N->use_empty()) {
- DAG.RemoveDeadNode(N, this);
+ DAG.RemoveDeadNode(N);
} else {
ForgetNode(N);
}
}
void ReplaceNode(SDNode *Old, SDNode *New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old);
}
void ReplaceNode(SDValue Old, SDValue New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old.getNode());
}
void ReplaceNode(SDNode *Old, const SDValue *New) {
- DAG.ReplaceAllUsesWith(Old, New, this);
+ DAG.ReplaceAllUsesWith(Old, New);
ReplacedNode(Old);
}
};
@@ -203,7 +206,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
}
SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
- : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+ : SelectionDAG::DAGUpdateListener(dag),
+ TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
DAG(dag) {
}
@@ -424,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
DebugLoc dl = LD->getDebugLoc();
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
- if (TLI.isTypeLegal(intVT)) {
+ if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
@@ -432,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
LD->isNonTemporal(),
LD->isInvariant(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
- if (VT.isFloatingPoint() && LoadedVT != VT)
- Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
ValResult = Result;
ChainResult = Chain;
@@ -638,9 +643,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// probably means that we need to integrate dag combiner and legalizer
// together.
// We generally can't do this one for long doubles.
- SDValue Tmp1 = ST->getChain();
- SDValue Tmp2 = ST->getBasePtr();
- SDValue Tmp3;
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -648,19 +652,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
if (CFP->getValueType(0) == MVT::f32 &&
TLI.isTypeLegal(MVT::i32)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (TLI.isTypeLegal(MVT::i64)) {
- Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
}
@@ -673,11 +677,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
@@ -688,14 +692,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
return SDValue(0, 0);
}
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ DebugLoc dl = Node->getDebugLoc();
+
+ unsigned Alignment = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ EVT VT = Value.getValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node),
+ DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ assert(VT.isVector() && "Unknown legal promote case!");
+ Value = DAG.getNode(ISD::BITCAST, dl,
+ TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT,
+ isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Value.getValueType())));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+ Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (ST->getAlignment() < ABIAlignment)
+ ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode())
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ DebugLoc dl = Node->getDebugLoc();
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ EVT VT = Node->getValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ // Only promote a load of vector type to another.
+ assert(VT.isVector() && "Cannot promote this load!");
+ // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth,
+ TLI.getShiftAmountTy(Hi.getValueType())));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+ DAG, TLI, Value, Chain);
+ }
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ ReplacedNode(Node);
+ }
+}
+
/// LegalizeOp - Return a legal replacement for the given operation, with
/// all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
return;
- DebugLoc dl = Node->getDebugLoc();
-
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
TargetLowering::TypeLegal &&
@@ -708,9 +1146,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
- bool isCustom = false;
-
// Figure out the correct action; the way to query this varies by opcode
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
@@ -816,9 +1251,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
if (SimpleFinishLegalizing) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- Ops.push_back(Node->getOperand(i));
+ SDNode *NewNode = Node;
switch (Node->getOpcode()) {
default: break;
case ISD::SHL:
@@ -828,11 +1261,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::ROTR:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[1].getValueType().isVector()) {
- SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+ if (!Node->getOperand(1).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(1));
HandleSDNode Handle(SAO);
LegalizeOp(SAO.getNode());
- Ops[1] = Handle.getValue();
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Handle.getValue());
}
break;
case ISD::SRL_PARTS:
@@ -840,18 +1276,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SHL_PARTS:
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Ops[2].getValueType().isVector()) {
- SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+ if (!Node->getOperand(2).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(2));
HandleSDNode Handle(SAO);
LegalizeOp(SAO.getNode());
- Ops[2] = Handle.getValue();
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Node->getOperand(1),
+ Handle.getValue());
}
break;
}
- SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
if (NewNode != Node) {
- DAG.ReplaceAllUsesWith(Node, NewNode, this);
+ DAG.ReplaceAllUsesWith(Node, NewNode);
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
ReplacedNode(Node);
@@ -860,27 +1299,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
switch (Action) {
case TargetLowering::Legal:
return;
- case TargetLowering::Custom:
+ case TargetLowering::Custom: {
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode()) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res.getNode()) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
if (e == 1)
- ResultVals.push_back(Tmp1);
+ ResultVals.push_back(Res);
else
- ResultVals.push_back(Tmp1.getValue(i));
+ ResultVals.push_back(Res.getValue(i));
}
- if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
- DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+ if (Res.getNode() != Node || Res.getResNo() != 0) {
+ DAG.ReplaceAllUsesWith(Node, ResultVals.data());
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
ReplacedNode(Node);
}
return;
}
-
+ }
// FALL THROUGH
case TargetLowering::Expand:
ExpandNode(Node);
@@ -904,428 +1343,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::CALLSEQ_END:
break;
case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(Node);
- Tmp1 = LD->getChain(); // Legalize the chain.
- Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD) {
- EVT VT = Node->getValueType(0);
- Tmp3 = SDValue(Node, 0);
- Tmp4 = SDValue(Node, 1);
-
- switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp3, Tmp4);
- }
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(Tmp3, DAG);
- if (Tmp1.getNode()) {
- Tmp3 = Tmp1;
- Tmp4 = Tmp1.getValue(1);
- }
- break;
- case TargetLowering::Promote: {
- // Only promote a load of vector type to another.
- assert(VT.isVector() && "Cannot promote this load!");
- // Change base type to a different vector type.
- EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
- Tmp4 = Tmp1.getValue(1);
- break;
- }
- }
- if (Tmp4.getNode() != Node) {
- assert(Tmp3.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
- ReplacedNode(Node);
- }
- return;
- }
-
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = Result;
- Tmp2 = Ch;
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- } else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- dl, Node->getValueType(0), Tmp1, Tmp2,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Hi.getValueType())));
-
- // Join the hi and lo parts.
- Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp2 = Ch;
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Tmp1 = SDValue(Node, 0);
- Tmp2 = SDValue(Node, 1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp3.getNode()) {
- Tmp1 = Tmp3;
- Tmp2 = Tmp3.getValue(1);
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, Tmp1, Tmp2);
- }
- }
- }
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
- LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp2 = Load.getValue(1);
- break;
- }
-
- assert(!SrcVT.isVector() &&
- "Vector Loads are handled in LegalizeVectorOps");
-
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
- Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
- Tmp1 = ValRes;
- Tmp2 = Result.getValue(1);
- break;
- }
- }
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- if (Tmp2.getNode() != Node) {
- assert(Tmp1.getNode() != Node && "Load must be completely replaced");
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
- ReplacedNode(Node);
- }
- break;
+ return LegalizeLoadOps(Node);
}
case ISD::STORE: {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- Tmp1 = ST->getChain();
- Tmp2 = ST->getBasePtr();
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
-
- if (!ST->isTruncatingStore()) {
- if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- ReplaceNode(ST, OptStore);
- break;
- }
-
- {
- Tmp3 = ST->getValue();
- EVT VT = Tmp3.getValueType();
- switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node),
- DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Tmp1.getNode())
- ReplaceNode(SDValue(Node, 0), Tmp1);
- break;
- case TargetLowering::Promote: {
- assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BITCAST, dl,
- TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- break;
- }
- } else {
- Tmp3 = ST->getValue();
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
-
- if (StWidth != StVT.getStoreSizeInBits()) {
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
- Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- SDValue Result =
- DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
- // If not storing a power-of-2 number of bits, expand as two stores.
- assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Store size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi;
- unsigned IncrementSize;
-
- if (TLI.isLittleEndian()) {
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
- // Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- RoundVT,
- isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(RoundWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- } else {
- // Big endian - avoid unaligned stores.
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
- // Store the top RoundWidth bits.
- Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
- DAG.getConstant(ExtraWidth,
- TLI.getShiftAmountTy(Tmp3.getValueType())));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- }
-
- // The order of the stores doesn't matter.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- ReplaceNode(SDValue(Node, 0), Result);
- } else {
- switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal:
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
- Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
- if (ST->getAlignment() < ABIAlignment)
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
- }
- break;
- case TargetLowering::Custom:
- ReplaceNode(SDValue(Node, 0),
- TLI.LowerOperation(SDValue(Node, 0), DAG));
- break;
- case TargetLowering::Expand:
- assert(!StVT.isVector() &&
- "Vector Stores are handled in LegalizeVectorOps");
-
- // TRUNCSTORE:i16 i32 -> STORE i16
- assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
- Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- SDValue Result =
- DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- }
- break;
+ return LegalizeStoreOps(Node);
}
}
}
@@ -1795,11 +1816,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
if (isTailCall)
InChain = TCChain;
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), isTailCall,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
@@ -1828,11 +1851,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- std::pair<SDValue,SDValue> CallInfo =
- TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo.first;
}
@@ -1860,11 +1885,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
}
@@ -1919,9 +1945,11 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
return TLI.getLibcallName(LC) != 0;
}
-/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// useDivRem - Only issue divrem libcall if both quotient and remainder are
/// needed.
-static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+ // The other use might have been replaced with a divrem already.
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
unsigned OtherOpcode = 0;
if (isSigned)
OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
@@ -1935,7 +1963,7 @@ static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
SDNode *User = *UI;
if (User == Node)
continue;
- if (User->getOpcode() == OtherOpcode &&
+ if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
User->getOperand(0) == Op0 &&
User->getOperand(1) == Op1)
return true;
@@ -1992,11 +2020,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
TLI.getPointerTy());
DebugLoc dl = Node->getDebugLoc();
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
// Remainder is loaded back from the stack frame.
SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
@@ -2570,14 +2599,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
// FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> CallResult =
- TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
Results.push_back(CallResult.second);
break;
}
@@ -2647,13 +2679,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::TRAP: {
// If this operation is not supported, lower it to 'abort()' call
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> CallResult =
- TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Node->getOperand(0),
+ Type::getVoidTy(*DAG.getContext()),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
Results.push_back(CallResult.second);
break;
}
@@ -3059,7 +3094,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
"Don't know how to expand this subtraction!");
Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
- Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
}
@@ -3074,7 +3109,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(1);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- UseDivRem(Node, isSigned, false))) {
+ useDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
// X % Y -> X-X/Y*Y
@@ -3102,7 +3137,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDVTList VTs = DAG.getVTList(VT, VT);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
- UseDivRem(Node, isSigned, true)))
+ useDivRem(Node, isSigned, true)))
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 95ddb1e..e8e968a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
unsigned NumElts = InVT.getVectorNumElements();
assert(NumElts == NVT.getVectorNumElements() &&
"Dst and Src must have the same number of elements");
- EVT EltVT = InVT.getScalarType();
assert(isPowerOf2_32(NumElts) &&
"Promoted vector type must be a power of two");
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts/2);
-
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(0));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
- DAG.getIntPtrConstant(NumElts/2));
EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
@@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
// A divide for UMULO will be faster than a function call. Select to
// make sure we aren't using 0.
SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
- RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ RHS, DAG.getConstant(0, VT), ISD::SETNE);
SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
- DAG.getConstant(1, VT), RHS);
+ DAG.getConstant(1, VT), RHS);
SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
SDValue Overflow;
Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
@@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
- DAG.getConstant(0, PtrVT), Temp,
- MachinePointerInfo(), false, false, 0);
+ DAG.getConstant(0, PtrVT), Temp,
+ MachinePointerInfo(), false, false, 0);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2319,16 +2315,17 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Func, Args, DAG, dl);
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+ Func, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, PtrVT),
ISD::SETNE);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 439aa4d..39337ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -628,7 +628,8 @@ namespace {
public:
explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
SmallSetVector<SDNode*, 16> &nta)
- : DTL(dtl), NodesToAnalyze(nta) {}
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
@@ -680,7 +681,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
do {
- DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+ DAG.ReplaceAllUsesOfValueWith(From, To);
// The old node may still be present in a map like ExpandedIntegers or
// PromotedIntegers. Inform maps about the replacement.
@@ -709,7 +710,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
SDValue NewVal(M, i);
if (M->getNodeId() == Processed)
RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
// OldVal may be a target of the ReplacedValues map which was marked
// NewNode to force reanalysis because it was updated. Ensure that
// anything that ReplacedValues mapped to OldVal will now be mapped
@@ -950,7 +951,7 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
if (i != ResNo)
ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
- return SDValue(N, ResNo);
+ return SDValue(N->getOperand(ResNo));
}
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
@@ -1054,12 +1055,14 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- std::pair<SDValue,SDValue> CallInfo =
- TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC),
/*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, dl);
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
return CallInfo.first;
}
@@ -1086,11 +1089,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- std::pair<SDValue, SDValue> CallInfo =
- TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ TargetLowering::
+ CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e866445..94fc976 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -135,6 +135,8 @@ public:
ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
}
+ SelectionDAG &getDAG() const { return DAG; }
+
private:
SDNode *AnalyzeNewNode(SDNode *N);
void AnalyzeNewValue(SDValue &Val);
@@ -151,7 +153,7 @@ private:
/// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
/// node with the corresponding input operand, except for the result 'ResNo',
- /// which is returned.
+ /// for which the corresponding input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
@@ -509,10 +511,12 @@ private:
void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
@@ -553,6 +557,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned OpNo);
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a8ff7c6..06f6bd6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue OldVec = N->getOperand(0);
unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
DebugLoc dl = N->getDebugLoc();
// Convert to a vector of the expanded element type, for example
@@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9fe4480..704f99b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -71,6 +71,9 @@ class VectorLegalizer {
// operands to a different type and bitcasting the result back to the
// original type.
SDValue PromoteVectorOp(SDValue Op);
+ // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
+ // operand to the next size up.
+ SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
public:
bool Run();
@@ -231,9 +234,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
- // "Promote" the operation by bitcasting
- Result = PromoteVectorOp(Op);
- Changed = true;
+ switch (Op.getOpcode()) {
+ default:
+ // "Promote" the operation by bitcasting
+ Result = PromoteVectorOp(Op);
+ Changed = true;
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ Result = PromoteVectorOpINT_TO_FP(Op);
+ Changed = true;
+ break;
+ }
break;
case TargetLowering::Legal: break;
case TargetLowering::Custom: {
@@ -293,6 +306,44 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
+SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ EVT VT = Op.getOperand(0).getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+
+ // Normal getTypeToPromoteTo() doesn't work here, as that will promote
+ // by widening the vector w/ the same element width and twice the number
+ // of elements. We want the other way around, the same number of elements,
+ // each twice the width.
+ //
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
+ assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+
+ DebugLoc dl = Op.getDebugLoc();
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
+ Operands.size());
+}
+
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5f23f01..4709202 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
- case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
@@ -115,6 +115,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SRL:
R = ScalarizeVecRes_BinOp(N);
break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -129,6 +132,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -141,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
NewVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+ return InOp;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
@@ -436,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n");
SDValue Lo, Hi;
-
+
// See if the target wants to custom expand this node.
if (CustomLowerNode(N, N->getValueType(ResNo), true))
return;
@@ -448,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split the result of this operator!");
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
@@ -529,6 +551,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FREM:
SplitVecRes_BinOp(N, Lo, Hi);
break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -548,6 +573,22 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
@@ -977,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to split this operator's operand!");
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
@@ -1203,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
GetSplitVector(N->getOperand(0), Lo, Hi);
EVT InVT = Lo.getValueType();
-
+
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
-
+
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
-
+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
+}
@@ -1755,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
if (InputWidened)
InOp = GetWidenedVector(InOp);
for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getIntPtrConstant(j));
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(j));
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
@@ -1816,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
DAG.getIntPtrConstant(0));
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
}
@@ -1832,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getIntPtrConstant(i));
Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
+ SatOp, CvtCode);
}
SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -1936,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
Cond1 = GetWidenedVector(Cond1);
if (Cond1.getValueType() != CondWidenVT)
- Cond1 = ModifyToType(Cond1, CondWidenVT);
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
}
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -2202,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
ResVT, WideSETCC, DAG.getIntPtrConstant(0));
- return PromoteTargetBoolean(CC, N->getValueType(0));
+ return PromoteTargetBoolean(CC, N->getValueType(0));
}
@@ -2371,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- isVolatile,
- isNonTemporal, isInvariant,
- MinAlign(Align, Increment));
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment));
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -2563,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getIntPtrConstant(Increment));
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type
Idx = Idx * NewVTWidth / ValEltWidth;
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index ff0136e..c3794d5 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -50,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
const TargetMachine &tm = (*IS->MF).getTarget();
ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
- // This hard requirment could be relaxed, but for now
+ // This hard requirement could be relaxed, but for now
// do not let it procede.
assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
// If packet is now full, reset the state so in the next cycle
// we start fresh.
- if (Packet.size() >= InstrItins->IssueWidth) {
+ if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
ResourcesModel->clearResources();
Packet.clear();
}
@@ -353,7 +353,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
}
/// Estimates change in reg pressure from this SU.
-/// It is acheived by trivial tracking of defined
+/// It is achieved by trivial tracking of defined
/// and used vregs in dependent instructions.
/// The RawPressure flag makes this function to ignore
/// existing reg file sizes, and report raw def/use
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 24da432..b7ce48a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -441,19 +441,14 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
- LRegs.push_back(Reg);
- Added = true;
- }
- }
- for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI)) {
+ LRegs.push_back(*AI);
Added = true;
}
}
+ }
return Added;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 2cb5d37..bf0a437 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -266,7 +266,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
const TargetLowering *TLI,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI,
- unsigned &RegClass, unsigned &Cost) {
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
EVT VT = RegDefPos.GetValue();
// Special handling for untyped values. These values can only come from
@@ -285,7 +286,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
unsigned Idx = RegDefPos.GetIdx();
const MCInstrDesc Desc = TII->get(Opcode);
- const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
RegClass = RC->getID();
// FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
// better way to determine it.
@@ -852,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
}
/// After backtracking, the hazard checker needs to be restored to a state
-/// corresponding the the current cycle.
+/// corresponding the current cycle.
void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
HazardRec->Reset();
@@ -1181,7 +1182,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
// Check if Ref is live.
if (!LiveRegDefs[*AliasI]) continue;
@@ -1920,7 +1921,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
RegDefPos.IsValid(); RegDefPos.Advance()) {
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
return true;
@@ -2034,7 +2035,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
continue;
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
RegPressure[RCId] += Cost;
break;
}
@@ -2049,7 +2050,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (SkipRegDefs > 0)
continue;
unsigned RCId, Cost;
- GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
if (RegPressure[RCId] < Cost) {
// Register pressure tracking is imprecise. This can happen. But we try
// hard not to let it happen because it likely results in poor scheduling.
@@ -2330,22 +2331,21 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::ILP ||
right->SchedulingPref == Sched::ILP)) {
- if (DisableSchedCycles) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
}
- else {
- // If neither instruction stalls (!LStall && !RStall) then
- // its height is already covered so only its depth matters. We also reach
- // this if both stall but have the same height.
- int LDepth = left->getDepth() - LPenalty;
- int RDepth = right->getDepth() - RPenalty;
- if (LDepth != RDepth) {
- DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
- << ") depth " << LDepth << " vs SU (" << right->NodeNum
- << ") depth " << RDepth << "\n");
- return LDepth < RDepth ? 1 : -1;
- }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
}
if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
@@ -2363,7 +2363,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
#ifndef NDEBUG
- const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
+ const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"};
#endif
DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
<< PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 75940ec..84e41fc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -61,6 +61,7 @@ namespace llvm {
if (isa<BasicBlockSDNode>(Node)) return true;
if (isa<FrameIndexSDNode>(Node)) return true;
if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
if (isa<JumpTableSDNode>(Node)) return true;
if (isa<ExternalSymbolSDNode>(Node)) return true;
if (isa<BlockAddressSDNode>(Node)) return true;
@@ -98,12 +99,6 @@ namespace llvm {
///
virtual void computeLatency(SUnit *SU);
- /// computeOperandLatency - Override dependence edge latency using
- /// operand use/def information
- ///
- virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const { }
-
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
unsigned OpIdx, SDep& dep) const;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 92671d1..f4fe892 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -14,16 +14,16 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeOrdering.h"
#include "SDNodeDbgValue.h"
+#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -71,7 +71,9 @@ static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
}
}
-SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
//===----------------------------------------------------------------------===//
// ConstantFPSDNode Class
@@ -217,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) {
return true;
}
+/// allOperandsUndef - Return true if the node has at least one operand
+/// and all operands of the specified node are ISD::UNDEF.
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ return false;
+
+ return true;
+}
+
/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -385,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
break;
}
case ISD::BasicBlock:
@@ -420,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(CP->getTargetFlags());
break;
}
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
case ISD::LOAD: {
const LoadSDNode *LD = cast<LoadSDNode>(N);
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
break;
}
case ISD::STORE: {
const StoreSDNode *ST = cast<StoreSDNode>(N);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -449,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
const AtomicSDNode *AT = cast<AtomicSDNode>(N);
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -465,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
} // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -544,16 +582,15 @@ void SelectionDAG::RemoveDeadNodes() {
/// RemoveDeadNodes - This method deletes the unreachable nodes in the
/// given list, and any nodes that become unreachable as a result.
-void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
// Process the worklist, deleting the nodes and adding their uses to the
// worklist.
while (!DeadNodes.empty()) {
SDNode *N = DeadNodes.pop_back_val();
- if (UpdateListener)
- UpdateListener->NodeDeleted(N, 0);
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, 0);
// Take the node out of the appropriate CSE map.
RemoveNodeFromCSEMaps(N);
@@ -574,7 +611,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
}
}
-void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+void SelectionDAG::RemoveDeadNode(SDNode *N){
SmallVector<SDNode*, 16> DeadNodes(1, N);
// Create a dummy node that adds a reference to the root node, preventing
@@ -582,7 +619,7 @@ void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
// dead node.)
HandleSDNode Dummy(getRoot());
- RemoveDeadNodes(DeadNodes, UpdateListener);
+ RemoveDeadNodes(DeadNodes);
}
void SelectionDAG::DeleteNode(SDNode *N) {
@@ -684,8 +721,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
/// node. This transfer can potentially trigger recursive merging.
///
void
-SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
- DAGUpdateListener *UpdateListener) {
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
// already exists.
if (!doNotCSE(N)) {
@@ -694,20 +730,19 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
// If there was already an existing matching node, use ReplaceAllUsesWith
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
- ReplaceAllUsesWith(N, Existing, UpdateListener);
+ ReplaceAllUsesWith(N, Existing);
- // N is now dead. Inform the listener if it exists and delete it.
- if (UpdateListener)
- UpdateListener->NodeDeleted(N, Existing);
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
DeleteNodeNotInCSEMaps(N);
return;
}
}
- // If the node doesn't already exist, we updated it. Inform a listener if
- // it exists.
- if (UpdateListener)
- UpdateListener->NodeUpdated(N);
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
@@ -855,7 +890,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), Ordering(0) {
+ Root(getEntryNode()), Ordering(0), UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
DbgInfo = new SDDbgInfo();
@@ -867,6 +902,7 @@ void SelectionDAG::init(MachineFunction &mf) {
}
SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
allnodes_clear();
delete Ordering;
delete DbgInfo;
@@ -1084,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1183,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
@@ -1949,6 +2004,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
return;
}
case ISD::FGETSIGN:
@@ -2246,8 +2302,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
}
// Handle LOADX separately here. EXTLOAD case will fallthrough.
- if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
unsigned ExtType = LD->getExtensionType();
switch (ExtType) {
default: break;
@@ -2428,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::FABS:
V.clearSign();
return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, VT);
+ break;
+ }
case ISD::FP_EXTEND: {
bool ignored;
// This can return overflow, underflow, or inexact; we don't care.
@@ -2675,6 +2748,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1 == N2) return N1;
break;
case ISD::CONCAT_VECTORS:
+ // Concat of UNDEFs is UNDEF.
+ if (N1.getOpcode() == ISD::UNDEF &&
+ N2.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
@@ -3708,8 +3786,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMCPY),
/*isTailCall=*/false,
@@ -3717,6 +3795,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -3761,8 +3841,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
/*isTailCall=*/false,
@@ -3770,6 +3850,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -3822,8 +3904,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
Entry.isSExt = false;
Args.push_back(Entry);
// FIXME: pass in DebugLoc
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+ TargetLowering::
+ CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
false, false, false, false, 0,
TLI.getLibcallCallingConv(RTLIB::MEMSET),
/*isTailCall=*/false,
@@ -3831,6 +3913,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
Args, *this, dl);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+
return CallResult.second;
}
@@ -3874,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -3946,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4002,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4079,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -4198,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4287,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4354,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4378,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -4654,13 +4746,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
continue;
- bool NoMatch = false;
- for (unsigned i = 2; i != NumVTs; ++i)
- if (VTs[i] != I->VTs[i]) {
- NoMatch = true;
- break;
- }
- if (!NoMatch)
+ if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2]))
return *I;
}
@@ -5237,11 +5323,7 @@ namespace {
/// pointed to by a use iterator is deleted, increment the use iterator
/// so that it doesn't dangle.
///
-/// This class also manages a "downlink" DAGUpdateListener, to forward
-/// messages to ReplaceAllUsesWith's callers.
-///
class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
- SelectionDAG::DAGUpdateListener *DownLink;
SDNode::use_iterator &UI;
SDNode::use_iterator &UE;
@@ -5249,21 +5331,13 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
// Increment the iterator as needed.
while (UI != UE && N == *UI)
++UI;
-
- // Then forward the message.
- if (DownLink) DownLink->NodeDeleted(N, E);
- }
-
- virtual void NodeUpdated(SDNode *N) {
- // Just forward the message.
- if (DownLink) DownLink->NodeUpdated(N);
}
public:
- RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+ RAUWUpdateListener(SelectionDAG &d,
SDNode::use_iterator &ui,
SDNode::use_iterator &ue)
- : DownLink(dl), UI(ui), UE(ue) {}
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
}
@@ -5273,8 +5347,7 @@ public:
///
/// This version assumes From has a single result value.
///
-void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
SDNode *From = FromN.getNode();
assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
"Cannot replace with this method!");
@@ -5288,7 +5361,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// is replaced by To, we don't want to replace of all its users with To
// too. See PR3018 for more info.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5307,7 +5380,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5321,8 +5394,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
/// This version assumes that for each value of From, there is a
/// corresponding value in To in the same position with the same type.
///
-void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
#ifndef NDEBUG
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
assert((!From->hasAnyUseOfValue(i) ||
@@ -5337,7 +5409,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5356,7 +5428,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5369,16 +5441,14 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
///
/// This version can replace From with any result values. To must match the
/// number and types of values returned by From.
-void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
- const SDValue *To,
- DAGUpdateListener *UpdateListener) {
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
- return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
@@ -5398,7 +5468,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5409,14 +5479,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The Deleted
/// vector is handled the same way as for ReplaceAllUsesWith.
-void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
- DAGUpdateListener *UpdateListener){
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
// Handle the really simple, really trivial case efficiently.
if (From == To) return;
// Handle the simple, trivial, case efficiently.
if (From.getNode()->getNumValues() == 1) {
- ReplaceAllUsesWith(From, To, UpdateListener);
+ ReplaceAllUsesWith(From, To);
return;
}
@@ -5424,7 +5493,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From.getNode()->use_begin(),
UE = From.getNode()->use_end();
- RAUWUpdateListener Listener(UpdateListener, UI, UE);
+ RAUWUpdateListener Listener(*this, UI, UE);
while (UI != UE) {
SDNode *User = *UI;
bool UserRemovedFromCSEMaps = false;
@@ -5460,7 +5529,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, &Listener);
+ AddModifiedNodeToCSEMaps(User);
}
// If we just RAUW'd the root, take note.
@@ -5489,11 +5558,10 @@ namespace {
/// handled the same way as for ReplaceAllUsesWith.
void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
const SDValue *To,
- unsigned Num,
- DAGUpdateListener *UpdateListener){
+ unsigned Num){
// Handle the simple, trivial case efficiently.
if (Num == 1)
- return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+ return ReplaceAllUsesOfValueWith(*From, *To);
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -5538,7 +5606,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
// Now that we have modified User, add it back to the CSE maps. If it
// already exists there, recursively merge the results together.
- AddModifiedNodeToCSEMaps(User, UpdateListener);
+ AddModifiedNodeToCSEMaps(User);
}
}
@@ -5579,7 +5647,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
}
}
- // Visit all the nodes. As we iterate, moves nodes into sorted order,
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
// such that by the time the end is reached all nodes will be sorted.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
SDNode *N = I;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f1e879b..ba5bd79 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Constants.h"
#include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
@@ -42,7 +43,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -51,6 +51,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -843,7 +844,7 @@ void SelectionDAGBuilder::clear() {
}
/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is seperated from the clear so that debug
+/// map. This function is separated from the clear so that debug
/// information that is dangling in a basic block can be properly
/// resolved in a different basic block. This allows the
/// SelectionDAG to resolve dangling debug information attached
@@ -941,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
default: llvm_unreachable("Unknown instruction type encountered!");
// Build the switch statement using the Instruction.def file.
#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
#include "llvm/Instruction.def"
}
@@ -1578,17 +1579,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
} else
Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
} else {
- assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+ assert(CB.CC == ISD::SETCC_INVALID &&
+ "Condition is undefined for to-the-range belonging check.");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
-
- if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
- ISD::SETLE);
+ ISD::SETULE);
} else {
SDValue SUB = DAG.getNode(ISD::SUB, dl,
VT, CmpOp, DAG.getConstant(Low, VT));
@@ -1826,9 +1828,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
visitInlineAsm(&I);
- else
+ else if (Fn && Fn->isIntrinsic()) {
+ assert(Fn->getIntrinsicID() == Intrinsic::donothing);
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ } else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
// If the value of the invoke is used outside of its defining block, make it
@@ -1901,8 +1907,6 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
const Value* SV,
MachineBasicBlock *Default,
MachineBasicBlock *SwitchBB) {
- Case& BackCase = *(CR.Range.second-1);
-
// Size is the number of Cases represented by this range.
size_t Size = CR.Range.second - CR.Range.first;
if (Size > 3)
@@ -1970,11 +1974,28 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
}
+ // Order cases by weight so the most likely case will be checked first.
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI) {
+ for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
+ uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
+ I->BB->getBasicBlock());
+ for (CaseItr J = CR.Range.first; J < I; ++J) {
+ uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
+ J->BB->getBasicBlock());
+ if (IWeight > JWeight)
+ std::swap(*I, *J);
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
- if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+ Case &BackCase = *(CR.Range.second-1);
+ if (Size > 1 &&
+ NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
// The last case block won't fall through into 'NextBlock' if we emit the
// branches in this order. See if rearranging a case value would help.
- for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+ // We start at the bottom as it's the case with the least weight.
+ for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
if (I->BB == NextBlock) {
std::swap(*I, BackCase);
break;
@@ -2006,7 +2027,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
CC = ISD::SETEQ;
LHS = SV; RHS = I->High; MHS = NULL;
} else {
- CC = ISD::SETLE;
+ CC = ISD::SETCC_INVALID;
LHS = I->Low; MHS = SV; RHS = I->High;
}
@@ -2031,14 +2052,14 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !TLI.getTargetMachine().Options.DisableJumpTables &&
+ return TLI.supportJumpTables() &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+ APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2104,7 +2125,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
const APInt &High = cast<ConstantInt>(I->High)->getValue();
- if (Low.sle(TEI) && TEI.sle(High)) {
+ if (Low.ule(TEI) && TEI.ule(High)) {
DestBBs.push_back(I->BB);
if (TEI==High)
++I;
@@ -2261,7 +2282,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Create a CaseBlock record representing a conditional branch to
// the LHS node if the value being switched on SV is less than C.
// Otherwise, branch to LHS.
- CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+ CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
if (CR.CaseBB == SwitchBB)
visitSwitchCase(CB, SwitchBB);
@@ -2333,7 +2354,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
// Optimize the case where all the case values fit in a
// word without having to subtract minValue. In this case,
// we can optimize away the subtraction.
- if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+ if (maxValue.ult(IntPtrBits)) {
cmpRange = maxValue;
} else {
lowBound = minValue;
@@ -2407,57 +2428,46 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
/// Clusterify - Transform simple list of Cases into list of CaseRange's
size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
const SwitchInst& SI) {
- size_t numCmps = 0;
+
+ /// Use a shorter form of declaration, and also
+ /// show the we want to use CRSBuilder as Clusterifier.
+ typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
+
+ Clusterifier TheClusterifier;
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
i != e; ++i) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
-
- Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
- SMBB, ExtraWeight));
- }
- std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
- // Merge case into clusters
- if (Cases.size() >= 2)
- // Must recompute end() each iteration because it may be
- // invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
- J != Cases.end(); ) {
- const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
- const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
- MachineBasicBlock* nextBB = J->BB;
- MachineBasicBlock* currentBB = I->BB;
-
- // If the two neighboring cases go to the same destination, merge them
- // into a single case.
- if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
- I->High = J->High;
- J = Cases.erase(J);
-
- if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
- uint32_t CurWeight = currentBB->getBasicBlock() ?
- BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
- uint32_t NextWeight = nextBB->getBasicBlock() ?
- BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
-
- BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
- CurWeight + NextWeight);
- }
- } else {
- I = J++;
- }
+ TheClusterifier.add(i.getCaseValueEx(), SMBB);
+ }
+
+ TheClusterifier.optimize();
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ size_t numCmps = 0;
+ for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
+ e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+ Clusterifier::Cluster &C = *i;
+ unsigned W = 0;
+ if (BPI) {
+ W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock());
+ if (!W)
+ W = 16;
+ W *= C.first.Weight;
+ BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W);
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
+ // FIXME: Currently work with ConstantInt based numbers.
+ // Changing it to APInt based is a pretty heavy for this commit.
+ Cases.push_back(Case(C.first.getLow().toConstantInt(),
+ C.first.getHigh().toConstantInt(), C.second, W));
+
+ if (C.first.getLow() != C.first.getHigh())
+ // A range counts double, since it requires two compares.
+ ++numCmps;
}
return numCmps;
@@ -2804,7 +2814,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
}
// Utility for visitShuffleVector - Return true if every element in Mask,
-// begining from position Pos and ending in Pos+Size, falls within the
+// beginning from position Pos and ending in Pos+Size, falls within the
// specified sequential range [L, L+Pos). or is undef.
static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
unsigned Pos, unsigned Size, int Low) {
@@ -4914,6 +4924,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::pow:
visitPow(I);
return 0;
+ case Intrinsic::fabs:
+ setValue(&I, DAG.getNode(ISD::FABS, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
+ case Intrinsic::floor:
+ setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return 0;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, dl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4921,6 +4941,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return 0;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isOperationLegal(ISD::FMA, VT) &&
+ TLI.isFMAFasterThanMulAndAdd(VT)){
+ setValue(&I, DAG.getNode(ISD::FMA, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ SDValue Mul = DAG.getNode(ISD::FMUL, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, dl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return 0;
+ }
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
MVT::i16, getValue(I.getArgOperand(0))));
@@ -5077,16 +5120,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
TargetLowering::ArgListTy Args;
- std::pair<SDValue, SDValue> Result =
- TLI.LowerCallTo(getRoot(), I.getType(),
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), I.getType(),
false, false, false, false, 0, CallingConv::C,
/*isTailCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed=*/true,
DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
Args, DAG, getCurDebugLoc());
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
return 0;
}
+ case Intrinsic::debugtrap: {
+ DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
+ return 0;
+ }
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
@@ -5139,6 +5187,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::lifetime_end:
// Discard region information.
return 0;
+ case Intrinsic::donothing:
+ // ignore
+ return 0;
}
}
@@ -5157,14 +5208,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- SmallVector<uint64_t, 4> Offsets;
GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
- Outs, TLI, &Offsets);
+ Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- DAG.getMachineFunction(),
- FTy->isVarArg(), Outs,
- FTy->getContext());
+ DAG.getMachineFunction(),
+ FTy->isVarArg(), Outs,
+ FTy->getContext());
SDValue DemoteStackSlot;
int DemoteStackIdx = -100;
@@ -5247,16 +5297,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
- std::pair<SDValue,SDValue> Result =
- TLI.LowerCallTo(getRoot(), RetTy,
- CS.paramHasAttr(0, Attribute::SExt),
- CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
- CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
- CS.getCallingConv(),
- isTailCall,
- CS.doesNotReturn(),
- !CS.getInstruction()->use_empty(),
- Callee, Args, DAG, getCurDebugLoc());
+ TargetLowering::
+ CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
+ getCurDebugLoc(), CS);
+ std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -5272,7 +5316,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
- unsigned NumValues = Outs.size();
+
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ RetTy = FTy->getReturnType();
+ ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+
+ unsigned NumValues = RetTys.size();
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
@@ -5280,8 +5330,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add,
+ SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
false, false, false, 1);
Values[i] = L;
@@ -5292,30 +5341,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
- // Collect the legal value parts into potentially illegal values
- // that correspond to the original function's return values.
- SmallVector<EVT, 4> RetTys;
- RetTy = FTy->getReturnType();
- ComputeValueVTs(TLI, RetTy, RetTys);
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
- SmallVector<SDValue, 4> ReturnValues;
- unsigned CurReg = 0;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
- SDValue ReturnValue =
- getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
- RegisterVT, VT, AssertOp);
- ReturnValues.push_back(ReturnValue);
- CurReg += NumRegs;
- }
-
setValue(CS.getInstruction(),
DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
DAG.getVTList(&RetTys[0], RetTys.size()),
- &ReturnValues[0], ReturnValues.size()));
+ &Values[0], Values.size()));
}
// Assign order to nodes here. If the call does not produce a result, it won't
@@ -5482,6 +5511,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Handle inline assembly differently.
@@ -5512,150 +5557,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call.
- if (!F->hasLocalLinkage() && F->hasName()) {
- StringRef Name = F->getName();
- if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
- (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
- (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType()) {
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
- } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
- (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
- (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
- (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
- (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
return;
- }
- } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
- (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
- (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
return;
- }
- } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
- (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
- (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
- }
- } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
- (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
- (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
- }
- } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
- (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
- (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
- (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
- (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
- }
- } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
- (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
- (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
return;
- }
- } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
- (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
- (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
- }
- } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
- (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
- (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
- }
- } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
- (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
- (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
- if (I.getNumArgOperands() == 1 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
- Tmp.getValueType(), Tmp));
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
- }
- } else if (Name == "memcmp") {
+ break;
+ case LibFunc::memcmp:
if (visitMemCmpCall(I))
return;
+ break;
}
}
}
@@ -5952,11 +5944,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -6225,8 +6217,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
- assert(!OpInfo.isIndirect &&
- "Don't know how to handle indirect register inputs yet!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(),
+ "Don't know how to handle indirect register inputs yet "
+ "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
+ break;
+ }
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
@@ -6369,24 +6368,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// FIXME: When all targets are
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
- bool RetSExt, bool RetZExt, bool isVarArg,
- bool isInreg, unsigned NumFixedArgs,
- CallingConv::ID CallConv, bool isTailCall,
- bool doesNotRet, bool isReturnValueUsed,
- SDValue Callee,
- ArgListTy &Args, SelectionDAG &DAG,
- DebugLoc dl) const {
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle all of the outgoing arguments.
- SmallVector<ISD::OutputArg, 32> Outs;
- SmallVector<SDValue, 32> OutVals;
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ ArgListTy &Args = CLI.Args;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
- Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
@@ -6419,8 +6412,8 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
Flags.setNest();
Flags.setOrigAlign(OriginalAlignment);
- EVT PartVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+ EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -6429,89 +6422,88 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
PartVT, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
- i < NumFixedArgs);
+ i < CLI.NumFixedArgs);
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
- Outs.push_back(MyFlags);
- OutVals.push_back(Parts[j]);
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
}
}
}
// Handle the incoming return values from the call.
- SmallVector<ISD::InputArg, 32> Ins;
+ CLI.Ins.clear();
SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, RetTy, RetTys);
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT.getSimpleVT();
- MyFlags.Used = isReturnValueUsed;
- if (RetSExt)
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
MyFlags.Flags.setSExt();
- if (RetZExt)
+ if (CLI.RetZExt)
MyFlags.Flags.setZExt();
- if (isInreg)
+ if (CLI.IsInReg)
MyFlags.Flags.setInReg();
- Ins.push_back(MyFlags);
+ CLI.Ins.push_back(MyFlags);
}
}
SmallVector<SDValue, 4> InVals;
- Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
- Outs, OutVals, Ins, dl, DAG, InVals);
+ CLI.Chain = LowerCall(CLI, InVals);
// Verify that the target's LowerCall behaved as expected.
- assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
"LowerCall didn't return a valid chain!");
- assert((!isTailCall || InVals.empty()) &&
+ assert((!CLI.IsTailCall || InVals.empty()) &&
"LowerCall emitted a return value for a tail call!");
- assert((isTailCall || InVals.size() == Ins.size()) &&
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
"LowerCall didn't emit the correct number of values!");
// For a tail call, the return value is merely live-out and there aren't
// any nodes in the DAG representing it. Return a special value to
// indicate that a tail call has been emitted and no more Instructions
// should be processed in the current block.
- if (isTailCall) {
- DAG.setRoot(Chain);
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
return std::make_pair(SDValue(), SDValue());
}
- DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
ISD::NodeType AssertOp = ISD::DELETED_NODE;
- if (RetSExt)
+ if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
- else if (RetZExt)
+ else if (CLI.RetZExt)
AssertOp = ISD::AssertZext;
SmallVector<SDValue, 4> ReturnValues;
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+ EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT,
AssertOp));
CurReg += NumRegs;
@@ -6521,12 +6513,12 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
// such a node, so we just return a null return value in that case. In
// that case, nothing will actually look at the value.
if (ReturnValues.empty())
- return std::make_pair(SDValue(), Chain);
+ return std::make_pair(SDValue(), CLI.Chain);
- SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
- DAG.getVTList(&RetTys[0], RetTys.size()),
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
&ReturnValues[0], ReturnValues.size());
- return std::make_pair(Res, Chain);
+ return std::make_pair(Res, CLI.Chain);
}
void TargetLowering::LowerOperationWrapper(SDNode *N,
@@ -6746,7 +6738,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index.
if (FrameIndexSDNode *FI =
- dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8393b41..4090002 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -180,17 +180,6 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const Case &C1, const Case &C2) {
- assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
@@ -351,7 +340,7 @@ public:
void clear();
/// clearDanglingDebugInfo - Clear the dangling debug information
- /// map. This function is seperated from the clear so that debug
+ /// map. This function is separated from the clear so that debug
/// information that is dangling in a basic block can be properly
/// resolved in a different basic block. This allows the
/// SelectionDAG to resolve dangling debug information attached
@@ -531,6 +520,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index f981afb..13cd011 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ScheduleDAGSDNodes.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/Assembly/Writer.h"
@@ -19,7 +20,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
case ISD::INTRINSIC_WO_CHAIN:
@@ -265,6 +266,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKSAVE: return "stacksave";
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
// Bit manipulation
case ISD::BSWAP: return "bswap";
@@ -408,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << " " << offset;
if (unsigned int TF = CP->getTargetFlags())
OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
} else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
OS << "<";
const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 605509b..4e5e3ba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -14,12 +14,8 @@
#define DEBUG_TYPE "isel"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@@ -27,7 +23,10 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -263,8 +263,6 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-void SelectionDAGISel::ISelUpdater::anchor() { }
-
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
@@ -451,9 +449,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
}
}
- done:;
}
+ done:
// Determine if there is a call to setjmp in the machine function.
MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
@@ -468,8 +466,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
for (;;) {
- DenseMap<unsigned, unsigned>::iterator J =
- FuncInfo->RegFixups.find(To);
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E) break;
To = J->second;
}
@@ -703,6 +700,25 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->clear();
}
+namespace {
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+} // end anonymous namespace
+
void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
@@ -719,9 +735,13 @@ void SelectionDAGISel::DoInstructionSelection() {
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
- ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
++ISelPosition;
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -748,10 +768,8 @@ void SelectionDAGISel::DoInstructionSelection() {
// If after the replacement this node is not used any more,
// remove this dead node.
- if (Node->use_empty()) { // Don't delete EntryToken, etc.
- ISelUpdater ISU(ISelPosition);
- CurDAG->RemoveDeadNode(Node, &ISU);
- }
+ if (Node->use_empty()) // Don't delete EntryToken, etc.
+ CurDAG->RemoveDeadNode(Node);
}
CurDAG->setRoot(Dummy.getValue());
@@ -961,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (TM.Options.EnableFastISel)
- FastIS = TLI.createFastISel(*FuncInfo);
+ FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
@@ -1680,8 +1698,6 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
- ISelUpdater ISU(ISelPosition);
-
// Now that all the normal results are replaced, we replace the chain and
// glue results if present.
if (!ChainNodesMatched.empty()) {
@@ -1705,7 +1721,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
@@ -1728,7 +1744,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
"Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputGlue, &ISU);
+ InputGlue);
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
@@ -1738,7 +1754,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
}
if (!NowDeadNodes.empty())
- CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1759,7 +1775,7 @@ enum ChainResult {
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
static ChainResult
-WalkChainUsers(SDNode *ChainedNode,
+WalkChainUsers(const SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
@@ -1992,14 +2008,14 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SelectionDAGISel &SDISel) {
+ const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SelectionDAGISel &SDISel, SDNode *N) {
+ const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
@@ -2062,7 +2078,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, SelectionDAGISel &SDISel) {
+ SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2075,7 +2091,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, SelectionDAGISel &SDISel) {
+ SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2094,7 +2110,8 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
- bool &Result, SelectionDAGISel &SDISel,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
@@ -2759,9 +2776,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
(SDNode*) 0));
}
- } else {
+ } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
+ } else {
+ // NodeToMatch was eliminated by CSE when the target changed the DAG.
+ // We will visit the equivalent node later.
+ DEBUG(dbgs() << "Node was eliminated by CSE\n");
+ return 0;
}
// If the node had chain/glue results, update our notion of the current
@@ -2959,6 +2981,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getFunction()->getName();
} else {
bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
unsigned iid =
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6cde05a..173ffac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -13,13 +13,13 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
#include "llvm/Function.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e341e15..f0c50c1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,13 +33,6 @@
#include <cctype>
using namespace llvm;
-/// We are in the process of implementing a new TypeLegalization action
-/// - the promotion of vector elements. This feature is disabled by default
-/// and only enabled using this flag.
-static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
- cl::desc("Allow promotion of integer vector element types"));
-
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names) {
@@ -521,8 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
/// NOTE: The constructor takes ownership of TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
- : TM(tm), TD(TM.getTargetData()), TLOF(*tlof),
- mayPromoteElements(AllowPromoteIntElem) {
+ : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -604,6 +597,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
IntDivIsCheap = false;
Pow2DivIsCheap = false;
JumpIsExpensive = false;
+ predictableSelectIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -618,6 +612,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
MinStackArgumentAlignment = 1;
ShouldFoldAtomicFences = false;
InsertFencesForAtomic = false;
+ SupportJumpTables = true;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -708,42 +703,34 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
return false;
}
-/// hasLegalSuperRegRegClasses - Return true if the specified register class
-/// has one or more super-reg register classes that are legal.
-bool
-TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
- if (*RC->superregclasses_begin() == 0)
- return false;
- for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
- E = RC->superregclasses_end(); I != E; ++I) {
- const TargetRegisterClass *RRC = *I;
- if (isLegalRC(RRC))
- return true;
- }
- return false;
-}
-
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
std::pair<const TargetRegisterClass*, uint8_t>
TargetLowering::findRepresentativeClass(EVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
- for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
- E = RC->superregclasses_end(); I != E; ++I) {
- const TargetRegisterClass *RRC = *I;
- if (RRC->isASubClass() || !isLegalRC(RRC))
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
continue;
- if (!hasLegalSuperRegRegClasses(RRC))
- return std::make_pair(RRC, 1);
- BestRC = RRC;
+ BestRC = SuperRC;
}
return std::make_pair(BestRC, 1);
}
-
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -835,11 +822,8 @@ void TargetLowering::computeRegisterProperties() {
unsigned NElts = VT.getVectorNumElements();
if (NElts != 1) {
bool IsLegalWiderType = false;
- // If we allow the promotion of vector elements using a flag,
- // then return TypePromoteInteger on vector elements.
// First try to promote the elements of integer vectors. If no legal
// promotion was found, fallback to the widen-vector method.
- if (mayPromoteElements)
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
// Promote vectors of integers to vectors with the same number
@@ -940,9 +924,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
// If there is a wider vector type with the same element type as this one,
- // we should widen to that legal vector type. This handles things like
- // <2 x float> -> <4 x float>.
- if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) {
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
RegisterVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterVT)) {
IntermediateVT = RegisterVT;
@@ -1000,13 +987,11 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
/// TODO: Move this out of TargetLowering.cpp.
void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
- const TargetLowering &TLI,
- SmallVectorImpl<uint64_t> *Offsets) {
+ const TargetLowering &TLI) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, ReturnType, ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
- unsigned Offset = 0;
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
@@ -1029,8 +1014,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
- unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
- PartVT.getTypeForEVT(ReturnType->getContext()));
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -1045,10 +1028,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
- if (Offsets) {
- Offsets->push_back(Offset);
- Offset += PartSize;
- }
}
}
}
@@ -2019,7 +1998,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Make sure we're not loosing bits from the constant.
+ // Make sure we're not losing bits from the constant.
if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) {
EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
@@ -2343,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (ConstantSDNode *AndRHS =
+ dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
+ EVT ShiftTy = DCI.isBeforeLegalize() ?
+ getPointerTy() : getShiftAmountTy(N0.getValueType());
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
}
if (isa<ConstantFPSDNode>(N0.getNode())) {
@@ -2411,25 +2439,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal;
+ switch (getBooleanContents(N0.getValueType().isVector())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
// We can always fold X == X for integer setcc's.
if (N0.getValueType().isInteger()) {
- switch (getBooleanContents(N0.getValueType().isVector())) {
- case UndefinedBooleanContent:
- case ZeroOrOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
- case ZeroOrNegativeOneBooleanContent:
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
- }
+ return DAG.getConstant(EqVal, VT);
}
unsigned UOF = ISD::getUnorderedFlavor(Cond);
if (UOF == 2) // FP operators that are undefined on NaNs.
- return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+ return DAG.getConstant(EqVal, VT);
if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
- return DAG.getConstant(UOF, VT);
+ return DAG.getConstant(EqVal, VT);
// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
// if it is not already.
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
- if (NewCond != Cond)
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getValueType()) == Legal))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2998,10 +3034,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- std::pair<unsigned, const TargetRegisterClass*> MatchRC =
- getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass*> InputRC =
- getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+ getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass*> InputRC =
+ getRegForInlineAsmConstraint(Input.ConstraintCode,
+ Input.ConstraintVT);
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
(MatchRC.second != InputRC.second)) {
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 0016047..8a6b120 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -26,13 +26,13 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "shadowstackgc"
-#include "llvm/CodeGen/GCs.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IRBuilder.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCs.h"
#include "llvm/Support/CallSite.h"
-#include "llvm/Support/IRBuilder.h"
using namespace llvm;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 9a86f32..980bd74 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,28 +13,28 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sjljehprepare"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <set>
using namespace llvm;
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 26cf259..c8c3fb3 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -62,7 +62,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
assert(mi2iMap.empty() &&
"MachineInstr -> Index mapping non-empty at initial numbering?");
- functionSize = 0;
unsigned index = 0;
MBBRanges.resize(mf->getNumBlockIDs());
idx2MBBMap.reserve(mf->size());
@@ -89,8 +88,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
// Save this base index in the maps.
mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
SlotIndex::Slot_Block)));
-
- ++functionSize;
}
// We insert one blank instructions between basic blocks.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 6f33f54..320128a 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -207,6 +207,17 @@ void SpillPlacement::activate(unsigned n) {
return;
ActiveNodes->set(n);
nodes[n].clear();
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that 1/32 of the
+ // connected blocks need to be interested before we consider expanding the
+ // region through the bundle. This helps compile time by limiting the number
+ // of blocks visited and the number of links in the Hopfield network.
+ if (bundles->getBlocks(n).size() > 100)
+ nodes[n].Bias = -0.0625f;
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9959f74..4a2b7ec 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -345,9 +345,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Values.clear();
// Reset the LiveRangeCalc instances needed for this spill mode.
- LRCalc[0].reset(&VRM.getMachineFunction());
+ LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
if (SpillMode)
- LRCalc[1].reset(&VRM.getMachineFunction());
+ LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
@@ -650,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
// Adjust RegAssign if a register assignment is killed at VNI->def. We
// want to avoid calculating the live range of the source register if
// possible.
- AssignI.find(VNI->def.getPrevSlot());
+ AssignI.find(Def.getPrevSlot());
if (!AssignI.valid() || AssignI.start() >= Def)
continue;
// If MI doesn't kill the assigned register, just leave it.
@@ -737,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
assert(ParentVNI && "Parent not live at complement def");
@@ -810,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() {
for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
VI != VE; ++VI) {
VNInfo *VNI = *VI;
+ if (VNI->isUnused())
+ continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
if (!Dom.first || Dom.second == VNI->def)
@@ -924,11 +930,9 @@ bool SplitEditor::transferValues() {
DEBUG(dbgs() << '\n');
}
- LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT,
- &LIS.getVNInfoAllocator());
+ LRCalc[0].calculateValues();
if (SpillMode)
- LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT,
- &LIS.getVNInfoAllocator());
+ LRCalc[1].calculateValues();
return Skipped;
}
@@ -953,8 +957,7 @@ void SplitEditor::extendPHIKillRanges() {
if (Edit->getParent().liveAt(LastUse)) {
assert(RegAssign.lookup(LastUse) == RegIdx &&
"Different register assignment in phi predecessor");
- LRC.extend(LI, End,
- LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator());
+ LRC.extend(LI, End);
}
}
}
@@ -1004,8 +1007,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
} else
Idx = Idx.getRegSlot(true);
- getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
- &MDT, &LIS.getVNInfoAllocator());
+ getLRCalc(RegIdx).extend(LI, Idx.getNextSlot());
}
}
@@ -1049,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
- VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
- VNI->setIsPHIDef(ParentVNI->isPHIDef());
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 43a6ad8..f1eab1f 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
// SSPBufferSize - The lower bound for a buffer to be considered for stack
@@ -46,7 +47,7 @@ namespace {
Function *F;
Module *M;
- DominatorTree* DT;
+ DominatorTree *DT;
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
@@ -70,8 +71,8 @@ namespace {
}
StackProtector(const TargetLowering *tli)
: FunctionPass(ID), TLI(tli) {
- initializeStackProtectorPass(*PassRegistry::getPassRegistry());
- }
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
@@ -95,7 +96,7 @@ bool StackProtector::runOnFunction(Function &Fn) {
DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
-
+
return InsertStackProtectors();
}
@@ -111,6 +112,8 @@ bool StackProtector::RequiresStackProtector() const {
return false;
const TargetData *TD = TLI->getTargetData();
+ const TargetMachine &TM = TLI->getTargetMachine();
+ Triple Trip(TM.getTargetTriple());
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
@@ -123,11 +126,17 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
+ if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+ // If we're on a non-Darwin platform, don't add stack protectors
+ // unless the array is a character array.
+ if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8))
+ continue;
+
// If an array has more than SSPBufferSize bytes of allocated space,
// then we emit stack protectors.
if (SSPBufferSize <= TD->getTypeAllocSize(AT))
return true;
+ }
}
}
@@ -159,17 +168,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuardSlot = alloca i8*
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
- //
+ //
PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
unsigned AddressSpace, Offset;
if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
Constant *OffsetVal =
ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
+
StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
PointerType::get(PtrTy, AddressSpace));
} else {
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
}
BasicBlock &Entry = F->getEntryBlock();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 1e940b1..20da36e 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
- bool ColorWithRegs;
LiveStacks* LS;
MachineFrameInfo *MFI;
const TargetInstrInfo *TII;
@@ -82,7 +81,7 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ MachineFunctionPass(ID), NextColor(-1) {
initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index c6fdc73..5b06195 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
LiveInterval &SrcInterval = LI->getInterval(SrcReg);
SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+ (void)SrcVNI;
assert(SrcVNI);
- SrcVNI->setHasPHIKill(true);
continue;
}
@@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
assert(DestVNI);
- DestVNI->setIsPHIDef(true);
// Prior to PHI elimination, the live ranges of PHIs begin at their defining
// instruction. After PHI elimination, PHI instructions are replaced by VNs
@@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
LI->getVNInfoAllocator());
- CopyVNI->setIsPHIDef(true);
CopyLI.addRange(LiveRange(MBBStartIndex,
DestCopyIndex.getRegSlot(),
CopyVNI));
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 8ebfbca..a813fa6 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -20,12 +20,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -57,8 +60,10 @@ namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
+ OwningPtr<RegScavenger> RS;
bool PreRegAlloc;
// SSAUpdateVRs - A list of virtual registers for which to update SSA form.
@@ -124,9 +129,13 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
PreRegAlloc = MRI->isSSA();
+ RS.reset();
+ if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
+ RS.reset(new RegScavenger());
bool MadeChange = false;
while (TailDuplicateBlocks(MF))
@@ -272,8 +281,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
continue;
unsigned Dst = Copy->getOperand(0).getReg();
unsigned Src = Copy->getOperand(1).getReg();
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
- if (++UI == MRI->use_end()) {
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
// Copy is the only use. Do trivial copy propagation here.
MRI->replaceRegWith(Dst, Src);
Copy->eraseFromParent();
@@ -429,8 +438,10 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
AddSSAUpdateEntry(Reg, NewReg, PredBB);
} else {
DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
- if (VI != LocalVRMap.end())
+ if (VI != LocalVRMap.end()) {
MO.setReg(VI->second);
+ MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
+ }
}
}
PredBB->insert(PredBB->instr_end(), NewMI);
@@ -775,6 +786,23 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Remove PredBB's unconditional branch.
TII->RemoveBranch(*PredBB);
+ if (RS && !TailBB->livein_empty()) {
+ // Update PredBB livein.
+ RS->enterBasicBlock(PredBB);
+ if (!PredBB->empty())
+ RS->forward(prior(PredBB->end()));
+ BitVector RegsLiveAtExit(TRI->getNumRegs());
+ RS->getRegsUsed(RegsLiveAtExit, false);
+ for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
+ E = TailBB->livein_end(); I != E; ++I) {
+ if (!RegsLiveAtExit[*I])
+ // If a register is previously livein to the tail but it's not live
+ // at the end of predecessor BB, then it should be added to its
+ // livein list.
+ PredBB->addLiveIn(*I);
+ }
+ }
+
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 2beb928..ddee6b2 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -501,6 +501,14 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
return new ScheduleHazardRecognizer();
}
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
@@ -509,6 +517,10 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
int
TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
@@ -537,3 +549,201 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
}
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned
+TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI->getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
+ const MachineInstr *DefMI) const {
+ if (DefMI->mayLoad())
+ return SchedModel->LoadLatency;
+ if (isHighLatencyDef(DefMI->getOpcode()))
+ return SchedModel->HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfoImpl::
+getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI->mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI,
+ unsigned DefIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfoImpl::
+getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+static int computeDefOperandLatency(
+ const TargetInstrInfo *TII, const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) {
+
+ // Let the target hook getInstrLatency handle missing itineraries.
+ if (!ItinData)
+ return TII->getInstrLatency(ItinData, DefMI);
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+ // it exists before defaulting to MinLatency.
+ if (ItinData->SchedModel->MinLatency >= 0)
+ return TII->getInstrLatency(ItinData, DefMI);
+
+ // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+ // For empty itineraries, short-cirtuit the check and default to one cycle.
+ if (ItinData->isEmpty())
+ return 1;
+ }
+ else if(ItinData->isEmpty())
+ return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
+
+ // ...operand lookup required
+ return -1;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use when the operand indices are already known.
+///
+/// FindMin may be set to get the minimum vs. expected latency.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx,
+ bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
+ return InstrLatency;
+}
+
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
+/// unknown use. Depending on the subtarget's itinerary properties, this may or
+/// may not need to call getOperandLatency().
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
+/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+ const TargetRegisterInfo *TRI,
+ const MachineInstr *DefMI, const MachineInstr *UseMI,
+ unsigned Reg, bool FindMin) const {
+
+ int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ // Find the definition of the register in the defining instruction.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+ if (DefIdx != -1) {
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (DefMI->getOperand(Op2).isReg())
+ DefIdx = Op2;
+ }
+ // For all uses of the register, calculate the maxmimum latency
+ int OperLatency = -1;
+
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!UseMI) {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ else {
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
+ OperLatency = std::max(OperLatency, UseCycle);
+ }
+ }
+ // If we found an operand latency, we're done.
+ if (OperLatency >= 0)
+ return OperLatency;
+ }
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
+ return InstrLatency;
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9925185..2a2fa9e 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -93,8 +93,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// N.B.: The defaults used in here are no the same ones used in MC.
// We follow gcc, MC follows gas. For example, given ".section .eh_frame",
// both gas and MC will produce a section with no flags. Given
- // section(".eh_frame") gcc will produce
- // .section .eh_frame,"a",@progbits
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
if (Name.empty() || Name[0] != '.') return K;
// Some lame default implementation based on some magic section names.
@@ -349,10 +350,17 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
if (Priority == 65535)
return StaticCtorSection;
- std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (UseInitArray) {
+ std::string Name = std::string(".init_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
}
const MCSection *
@@ -362,10 +370,35 @@ TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
if (Priority == 65535)
return StaticDtorSection;
- std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
- return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC |ELF::SHF_WRITE,
- SectionKind::getDataRel());
+ if (UseInitArray) {
+ std::string Name = std::string(".fini_array.") + utostr(Priority);
+ return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ } else {
+ std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ if (!UseInitArray)
+ return;
+
+ StaticCtorSection =
+ getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ StaticDtorSection =
+ getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE |
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
}
//===----------------------------------------------------------------------===//
@@ -379,7 +412,7 @@ emitModuleFlags(MCStreamer &Streamer,
ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
Mangler *Mang, const TargetMachine &TM) const {
unsigned VersionVal = 0;
- unsigned GCFlags = 0;
+ unsigned ImageInfoFlags = 0;
StringRef SectionVal;
for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -396,8 +429,9 @@ emitModuleFlags(MCStreamer &Streamer,
if (Key == "Objective-C Image Info Version")
VersionVal = cast<ConstantInt>(Val)->getZExtValue();
else if (Key == "Objective-C Garbage Collection" ||
- Key == "Objective-C GC Only")
- GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated")
+ ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue();
else if (Key == "Objective-C Image Info Section")
SectionVal = cast<MDString>(Val)->getString();
}
@@ -424,7 +458,7 @@ emitModuleFlags(MCStreamer &Streamer,
Streamer.EmitLabel(getContext().
GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
Streamer.EmitIntValue(VersionVal, 4);
- Streamer.EmitIntValue(GCFlags, 4);
+ Streamer.EmitIntValue(ImageInfoFlags, 4);
Streamer.AddBlankLine();
}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c30b133..aa601af 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -30,6 +30,7 @@
#define DEBUG_TYPE "twoaddrinstr"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
-STATISTIC(NumReMats, "Number of instructions re-materialized");
-STATISTIC(NumDeletes, "Number of dead instructions deleted");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const InstrItineraryData *InstrItins;
MachineRegisterInfo *MRI;
LiveVariables *LV;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
AliasAnalysis *AA;
CodeGenOpt::Level OptLevel;
@@ -92,17 +94,10 @@ namespace {
unsigned Reg,
MachineBasicBlock::iterator OldPos);
- bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc);
-
bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
unsigned &LastDef);
- MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
- unsigned Dist);
-
- bool isProfitableToCommute(unsigned regB, unsigned regC,
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist);
@@ -117,14 +112,6 @@ namespace {
MachineFunction::iterator &mbbi,
unsigned RegA, unsigned RegB, unsigned Dist);
- typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
- bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist);
- bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi, unsigned Dist);
-
bool isDefTooClose(unsigned Reg, unsigned Dist,
MachineInstr *MI, MachineBasicBlock *MBB);
@@ -150,6 +137,11 @@ namespace {
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
@@ -167,6 +159,8 @@ namespace {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
@@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->isTerminator())
+ KillMI == OldPos || KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
}
}
}
+ assert(KillMO && "Didn't find kill");
// Update kill and LV information.
KillMO->setIsKill(false);
@@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MBB->remove(MI);
MBB->insert(KillPos, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+
++Num3AddrSunk;
return true;
}
-/// isTwoAddrUse - Return true if the specified MI is using the specified
-/// register as a two-address operand.
-static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
- const MCInstrDesc &MCID = UseMI->getDesc();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = UseMI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg &&
- (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
- // Earlier use is a two-address one.
- return true;
- }
- return false;
-}
-
-/// isProfitableToReMat - Return true if the heuristics determines it is likely
-/// to be profitable to re-materialize the definition of Reg rather than copy
-/// the register.
-bool
-TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
- const TargetRegisterClass *RC,
- MachineInstr *MI, MachineInstr *DefMI,
- MachineBasicBlock *MBB, unsigned Loc) {
- bool OtherUse = false;
- for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = UseMO.getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end() && DI->second == Loc)
- continue; // Current use.
- OtherUse = true;
- // There is at least one other use in the MBB that will clobber the
- // register.
- if (isTwoAddrUse(UseMI, Reg))
- return true;
- }
- }
-
- // If other uses in MBB are not two-address uses, then don't remat.
- if (OtherUse)
- return false;
-
- // No other uses in the same block, remat if it's defined in the same
- // block so it does not unnecessarily extend the live range.
- return MBB == DefMI->getParent();
-}
-
/// NoUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
return !(LastUse > LastDef && LastUse < Dist);
}
-MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
- MachineBasicBlock *MBB,
- unsigned Dist) {
- unsigned LastUseDist = 0;
- MachineInstr *LastUse = 0;
- for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
- E = MRI->reg_end(); I != E; ++I) {
- MachineOperand &MO = I.getOperand();
- MachineInstr *MI = MO.getParent();
- if (MI->getParent() != MBB || MI->isDebugValue())
- continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
- if (DI == DistanceMap.end())
- continue;
- if (DI->second >= Dist)
- continue;
-
- if (MO.isUse() && DI->second > LastUseDist) {
- LastUse = DI->first;
- LastUseDist = DI->second;
- }
- }
- return LastUse;
-}
-
/// isCopyToReg - Return true if the specified MI is a copy instruction or
/// a extract_subreg instruction. It also returns the source and destination
/// registers and whether they are physical registers by reference.
@@ -483,32 +407,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
return false;
}
-/// findLocalKill - Look for an instruction below MI in the MBB that kills the
-/// specified register. Returns null if there are any other Reg use between the
-/// instructions.
-static
-MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
- MachineInstr *MI, MachineRegisterInfo *MRI,
- DenseMap<MachineInstr*, unsigned> &DistanceMap) {
- MachineInstr *KillMI = 0;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(Reg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (UseMI == MI || UseMI->getParent() != MBB)
- continue;
- if (DistanceMap.count(UseMI))
- continue;
- if (!UI.getOperand().isKill())
- return 0;
- if (KillMI)
- return 0; // -O0 kill markers cannot be trusted?
- KillMI = UseMI;
- }
-
- return KillMI;
-}
-
/// findOnlyInterestingUse - Given a register, if has a single in-basic block
/// use, return the use instruction if it's a copy or a two-address use.
static
@@ -564,10 +462,11 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
-/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
-TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
+ unsigned regC,
MachineInstr *MI, MachineBasicBlock *MBB,
unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
@@ -604,15 +503,15 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
// %reg1026<def> = ADD %reg1024, %reg1025
// r0 = MOV %reg1026
// Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
- unsigned FromRegB = getMappedReg(regB, SrcRegMap);
- unsigned FromRegC = getMappedReg(regC, SrcRegMap);
- unsigned ToRegB = getMappedReg(regB, DstRegMap);
- unsigned ToRegC = getMappedReg(regC, DstRegMap);
- if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
- ((!FromRegC && !ToRegC) ||
- regsAreCompatible(FromRegB, ToRegC, TRI) ||
- regsAreCompatible(FromRegC, ToRegB, TRI)))
- return true;
+ unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ if (ToRegA) {
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI);
+ if (BComp != CComp)
+ return !BComp && CComp;
+ }
// If there is a use of regC between its last def (could be livein) and this
// instruction, then bail.
@@ -653,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
if (LV)
// Update live variables
LV->replaceKillInstruction(RegC, MI, NewMI);
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(MI, NewMI);
mbbi->insert(mi, NewMI); // Insert the new inst
mbbi->erase(mi); // Nuke the old inst.
@@ -701,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
bool Sunk = false;
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(mi, NewMI);
+
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
// uses RegB, convertToThreeAddress must have created more
@@ -810,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
return;
}
-/// isSafeToDelete - If the specified instruction does not produce any side
-/// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI,
- const TargetInstrInfo *TII,
- SmallVector<unsigned, 4> &Kills) {
- if (MI->mayStore() || MI->isCall())
- return false;
- if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- if (MO.isDef() && !MO.isDead())
- return false;
- if (MO.isUse() && MO.isKill())
- Kills.push_back(MO.getReg());
- }
- return true;
-}
-
-/// canUpdateDeletedKills - Check if all the registers listed in Kills are
-/// killed by instructions in MBB preceding the current instruction at
-/// position Dist. If so, return true and record information about the
-/// preceding kills in NewKills.
-bool TwoAddressInstructionPass::
-canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
- SmallVector<NewKill, 4> &NewKills,
- MachineBasicBlock *MBB, unsigned Dist) {
- while (!Kills.empty()) {
- unsigned Kill = Kills.back();
- Kills.pop_back();
- if (TargetRegisterInfo::isPhysicalRegister(Kill))
- return false;
-
- MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
- if (!LastKill)
- return false;
-
- bool isModRef = LastKill->definesRegister(Kill);
- NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
- LastKill));
- }
- return true;
-}
-
-/// DeleteUnusedInstr - If an instruction with a tied register operand can
-/// be safely deleted, just delete it.
-bool
-TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned Dist) {
- // Check if the instruction has no side effects and if all its defs are dead.
- SmallVector<unsigned, 4> Kills;
- if (!isSafeToDelete(mi, TII, Kills))
- return false;
-
- // If this instruction kills some virtual registers, we need to
- // update the kill information. If it's not possible to do so,
- // then bail out.
- SmallVector<NewKill, 4> NewKills;
- if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
- return false;
-
- if (LV) {
- while (!NewKills.empty()) {
- MachineInstr *NewKill = NewKills.back().second;
- unsigned Kill = NewKills.back().first.first;
- bool isDead = NewKills.back().first.second;
- NewKills.pop_back();
- if (LV->removeVirtualRegisterKilled(Kill, mi)) {
- if (isDead)
- LV->addVirtualRegisterDead(Kill, NewKill);
- else
- LV->addVirtualRegisterKilled(Kill, NewKill);
- }
- }
- }
-
- mbbi->erase(mi); // Nuke the old inst.
- mi = nmi;
- return true;
-}
-
/// RescheduleMIBelowKill - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
/// instruction in order to eliminate the need for the copy.
@@ -904,14 +722,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -998,6 +821,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
// Don't want to extend other live ranges and update kills.
return false;
+ if (MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
}
}
}
@@ -1011,20 +840,13 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
MBB->splice(KillPos, MBB, From, To);
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(MI);
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
return true;
}
@@ -1045,7 +867,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return true; // Below MI
unsigned DefDist = DDI->second;
assert(Dist > DefDist && "Visited def already?");
- if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
return true;
}
return false;
@@ -1060,14 +882,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned Reg) {
+ // Bail immediately if we don't have LV available. We use it to find kills
+ // efficiently.
+ if (!LV)
+ return false;
+
MachineInstr *MI = &*mi;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
if (DI == DistanceMap.end())
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
- if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
// Don't mess with copies, they may be coalesced later.
return false;
@@ -1093,6 +920,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
continue;
if (isDefTooClose(MOReg, DI->second, MI, MBB))
return false;
+ if (MOReg == Reg && !MO.isKill())
+ return false;
Uses.insert(MOReg);
if (MO.isKill() && MOReg != Reg)
Kills.insert(MOReg);
@@ -1134,6 +963,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
if (Kills.count(MOReg))
// Don't want to extend other live ranges and update kills.
return false;
+ if (OtherMI != MI && MOReg == Reg && !MO.isKill())
+ // We can't schedule across a use of the register in question.
+ return false;
} else {
OtherDefs.push_back(MOReg);
}
@@ -1164,19 +996,13 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
DistanceMap.erase(DI);
- if (LV) {
- // Update live variables
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
- } else {
- for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = KillMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
- continue;
- MO.setIsKill(false);
- }
- MI->addRegisterKilled(Reg, 0);
- }
+ // Update live variables
+ LV->removeVirtualRegisterKilled(Reg, KillMI);
+ LV->addVirtualRegisterKilled(Reg, MI);
+ if (LIS)
+ LIS->handleMove(KillMI);
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
return true;
}
@@ -1201,15 +1027,10 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
assert(TargetRegisterInfo::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
-
- // If regA is dead and the instruction can be deleted, just delete
- // it so it doesn't clobber regB.
bool regBKilled = isKilled(MI, regB, MRI, TII);
- if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
- DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
- ++NumDeletes;
- return true; // Done with this instruction.
- }
+
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ ScanUses(regA, &*mbbi, Processed);
// Check if it is profitable to commute the operands.
unsigned SrcOp1, SrcOp2;
@@ -1230,7 +1051,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
- else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
+ else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) {
TryCommute = true;
AggressiveCommute = true;
}
@@ -1252,9 +1073,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return true;
}
- if (TargetRegisterInfo::isVirtualRegister(regA))
- ScanUses(regA, &*mbbi, Processed);
-
if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
@@ -1293,15 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (NewOpc != 0) {
const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
if (UnfoldMCID.getNumDefs() == 1) {
- MachineFunction &MF = *mbbi->getParent();
-
// Unfold the load.
DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
const TargetRegisterClass *RC =
- TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
+ if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -1378,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
}
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->isInlineAsm() ?
+ MI->getNumOperands() : MCID.getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ unsigned RegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ SlotIndex CopyIdx;
+ if (Indexes)
+ CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+ MO.setReg(RegA);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, PrevMI);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
/// runOnMachineFunction - Reduce two-address instructions to two operands.
///
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- MRI = &MF.getRegInfo();
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
InstrItins = TM.getInstrItineraryData();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
AA = &getAnalysis<AliasAnalysis>();
OptLevel = TM.getOptLevel();
@@ -1394,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
DEBUG(dbgs() << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << MF->getFunction()->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
- // ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs(MRI->getNumVirtRegs());
-
- typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
- TiedOperandMap;
- TiedOperandMap TiedOperands(4);
+ TiedOperandMap TiedOperands;
SmallPtrSet<MachineInstr*, 8> Processed;
- for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
mbbi != mbbe; ++mbbi) {
unsigned Dist = 0;
DistanceMap.clear();
@@ -1426,188 +1400,63 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
if (mi->isRegSequence())
RegSequences.push_back(&*mi);
- const MCInstrDesc &MCID = mi->getDesc();
- bool FirstTied = true;
-
DistanceMap.insert(std::make_pair(mi, ++Dist));
ProcessCopy(&*mi, &*mbbi, Processed);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- unsigned NumOps = mi->isInlineAsm()
- ? mi->getNumOperands() : MCID.getNumOperands();
- for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
- unsigned DstIdx = 0;
- if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
- continue;
-
- if (FirstTied) {
- FirstTied = false;
- ++NumTwoAddressInstrs;
- DEBUG(dbgs() << '\t' << *mi);
- }
-
- assert(mi->getOperand(SrcIdx).isReg() &&
- mi->getOperand(SrcIdx).getReg() &&
- mi->getOperand(SrcIdx).isUse() &&
- "two address instruction invalid");
-
- unsigned regB = mi->getOperand(SrcIdx).getReg();
- TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+ if (!collectTiedOperands(mi, TiedOperands)) {
+ mi = nmi;
+ continue;
}
- // Now iterate over the information collected above.
- for (TiedOperandMap::iterator OI = TiedOperands.begin(),
- OE = TiedOperands.end(); OI != OE; ++OI) {
- SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
-
- // If the instruction has a single pair of tied operands, try some
- // transformations that may either eliminate the tied operands or
- // improve the opportunities for coalescing away the register copy.
- if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
unsigned SrcIdx = TiedPairs[0].first;
unsigned DstIdx = TiedPairs[0].second;
-
- // If the registers are already equal, nothing needs to be done.
- if (mi->getOperand(SrcIdx).getReg() ==
- mi->getOperand(DstIdx).getReg())
- break; // Done with this instruction.
-
- if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
- Processed))
- break; // The tied operands have been eliminated.
- }
-
- bool IsEarlyClobber = false;
- bool RemovedKillFlag = false;
- bool AllUsesCopied = true;
- unsigned LastCopiedReg = 0;
- unsigned regB = OI->first;
- for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
- unsigned SrcIdx = TiedPairs[tpi].first;
- unsigned DstIdx = TiedPairs[tpi].second;
-
- const MachineOperand &DstMO = mi->getOperand(DstIdx);
- unsigned regA = DstMO.getReg();
- IsEarlyClobber |= DstMO.isEarlyClobber();
-
- // Grab regB from the instruction because it may have changed if the
- // instruction was commuted.
- regB = mi->getOperand(SrcIdx).getReg();
-
- if (regA == regB) {
- // The register is tied to multiple destinations (or else we would
- // not have continued this far), but this use of the register
- // already matches the tied destination. Leave it.
- AllUsesCopied = false;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+ Processed)) {
+ // The tied operands have been eliminated or shifted further down the
+ // block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
continue;
}
- LastCopiedReg = regA;
-
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
-
-#ifndef NDEBUG
- // First, verify that we don't have a use of "a" in the instruction
- // (a = b + a for example) because our transformation will not
- // work. This should never occur because we are in SSA form.
- for (unsigned i = 0; i != mi->getNumOperands(); ++i)
- assert(i == DstIdx ||
- !mi->getOperand(i).isReg() ||
- mi->getOperand(i).getReg() != regA);
-#endif
-
- // Emit a copy or rematerialize the definition.
- const TargetRegisterClass *rc = MRI->getRegClass(regB);
- MachineInstr *DefMI = MRI->getVRegDef(regB);
- // If it's safe and profitable, remat the definition instead of
- // copying it.
- if (DefMI &&
- DefMI->isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, AA, regB) &&
- isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
- DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
- unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
- ++NumReMats;
- } else {
- BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
- regA).addReg(regB);
- }
-
- MachineBasicBlock::iterator prevMI = prior(mi);
- // Update DistanceMap.
- DistanceMap.insert(std::make_pair(prevMI, Dist));
- DistanceMap[mi] = ++Dist;
-
- DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
-
- MachineOperand &MO = mi->getOperand(SrcIdx);
- assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
- "inconsistent operand info for 2-reg pass");
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(regA);
}
+ }
- if (AllUsesCopied) {
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- }
- }
- }
-
- // Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
- LV->addVirtualRegisterKilled(regB, prior(mi));
-
- } else if (RemovedKillFlag) {
- // Some tied uses of regB matched their destination registers, so
- // regB is still used in this instruction, but a kill flag was
- // removed from a different tied use of regB, so now we need to add
- // a kill flag to one of the remaining uses of regB.
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
- MO.setIsKill(true);
- break;
- }
- }
- }
-
- // Schedule the source copy / remat inserted to form two-address
- // instruction. FIXME: Does it matter the distance map may not be
- // accurate after it's scheduled?
- TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
-
- MadeChange = true;
-
+ // Now iterate over the information collected above.
+ for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+ OE = TiedOperands.end(); OI != OE; ++OI) {
+ processTiedPairs(mi, OI->second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
- // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
- if (mi->isInsertSubreg()) {
- // From %reg = INSERT_SUBREG %reg, %subreg, subidx
- // To %reg:subidx = COPY %subreg
- unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
- assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
- mi->getOperand(0).setSubReg(SubIdx);
- mi->RemoveOperand(1);
- mi->setDesc(TII->get(TargetOpcode::COPY));
- DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
- }
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
}
// Clear TiedOperands here instead of at the top of the loop
@@ -1617,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Some remat'ed instructions are dead.
- for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
- if (MRI->use_nodbg_empty(VReg)) {
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- DefMI->eraseFromParent();
- }
- }
-
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
// SSA form. It's now safe to de-SSA.
MadeChange |= EliminateRegSequences();
@@ -1694,9 +1534,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
continue;
// Check that the instructions are all in the same basic block.
- MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
- MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
- if (SrcDefMI->getParent() != DstDefMI->getParent())
+ MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg);
+ MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg);
+ if (!SrcDefMI || !DstDefMI ||
+ SrcDefMI->getParent() != DstDefMI->getParent())
continue;
// If there are no other uses than copies which feed into
@@ -1832,6 +1673,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallVector<unsigned, 4> RealSrcs;
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ // Nothing needs to be inserted for <undef> operands.
+ if (MI->getOperand(i).isUndef()) {
+ MI->getOperand(i).setReg(0);
+ continue;
+ }
unsigned SrcReg = MI->getOperand(i).getReg();
unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
unsigned SubIdx = MI->getOperand(i+1).getImm();
@@ -1841,7 +1687,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineInstr *DefMI = NULL;
if (!MI->getOperand(i).getSubReg() &&
!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- DefMI = MRI->getVRegDef(SrcReg);
+ DefMI = MRI->getUniqueVRegDef(SrcReg);
}
if (DefMI && DefMI->isImplicitDef()) {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 3bab93b..93840f0 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -18,12 +18,14 @@
#define DEBUG_TYPE "regalloc"
#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -104,11 +106,149 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
Virt2StackSlotMap[virtReg] = SS;
}
-void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI->getRegClass(Reg)->getName() << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+ print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ void rewrite();
+ void addMBBLiveIns();
+public:
+ static char ID;
+ VirtRegRewriter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+char VirtRegRewriter::ID = 0;
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TM = &MF->getTarget();
+ TRI = TM->getRegisterInfo();
+ TII = TM->getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: "
<< MF->getFunction()->getName() << '\n');
- DEBUG(dump());
+ DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags();
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ return true;
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ SmallVector<MachineBasicBlock*, 16> LiveIn;
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+
+ // Scan the segments of LI.
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E;
+ ++I) {
+ if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn))
+ continue;
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
+ if (!LiveIn[i]->isLiveIn(PhysReg))
+ LiveIn[i]->addLiveIn(PhysReg);
+ LiveIn.clear();
+ }
+ }
+}
+
+void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
@@ -135,8 +275,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
- unsigned PhysReg = getPhys(VirtReg);
- assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "Instruction uses unmapped VirtReg");
assert(!Reserved.test(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
@@ -207,31 +348,3 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
if (!MRI->reg_nodbg_empty(Reg))
MRI->setPhysRegUsed(Reg);
}
-
-void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
- const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
-
- OS << "********** REGISTER MAP **********\n";
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
- OS << '[' << PrintReg(Reg, TRI) << " -> "
- << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
- << MRI.getRegClass(Reg)->getName() << "\n";
- }
- }
-
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
- if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
- OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
- << "] " << MRI.getRegClass(Reg)->getName() << "\n";
- }
- }
- OS << '\n';
-}
-
-void VirtRegMap::dump() const {
- print(dbgs());
-}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 8cac311..c320985 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -177,13 +177,6 @@ namespace llvm {
/// the specified stack slot
void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
- /// rewrite - Rewrite all instructions in MF to use only physical registers
- /// by mapping all virtual register operands to their assigned physical
- /// registers.
- ///
- /// @param Indexes Optionally remove deleted instructions from indexes.
- void rewrite(SlotIndexes *Indexes);
-
void print(raw_ostream &OS, const Module* M = 0) const;
void dump() const;
};
OpenPOWER on IntegriCloud