summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp171
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp234
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h7
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp203
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h27
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h40
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp831
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.h4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.h2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp90
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp70
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp337
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp62
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp1458
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.h13
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp157
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp73
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp1077
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.h7
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp346
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1190
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp147
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp50
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp48
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp73
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp182
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h9
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp284
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp372
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h19
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp783
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp120
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp306
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp499
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.h4
123 files changed, 7267 insertions, 3363 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 205480a..7a1c049 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -635,7 +635,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
--R;
const unsigned NewSuperReg = Order[R];
// Don't consider non-allocatable registers
- if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
@@ -818,7 +818,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!RegClassInfo.isAllocatable(AntiDepReg)) {
+ if (!MRI.isAllocatable(AntiDepReg)) {
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(dbgs() << " (non-allocatable)\n");
continue;
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 32ad34a..7cde136 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -29,6 +29,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
std::pair<unsigned, unsigned> HintPair =
VRM.getRegInfo().getRegAllocationHint(VirtReg);
+ const MachineRegisterInfo &MRI = VRM.getRegInfo();
// HintPair.second is a register, phys or virt.
Hint = HintPair.second;
@@ -52,7 +53,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
unsigned *P = new unsigned[Order.size()];
Begin = P;
for (unsigned i = 0; i != Order.size(); ++i)
- if (!RCI.isReserved(Order[i]))
+ if (!MRI.isReserved(Order[i]))
*P++ = Order[i];
End = P;
@@ -69,7 +70,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
// The hint must be a valid physreg for allocation.
if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || RCI.isReserved(Hint)))
+ !RC->contains(Hint) || MRI.isReserved(Hint)))
Hint = 0;
}
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 447f398..5162ad7 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -21,7 +21,7 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/ErrorHandling.h"
@@ -79,7 +79,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+ const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
@@ -91,7 +91,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+ uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
StartingOffset + i * EltSize);
@@ -314,11 +314,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// the return. Ignore noalias because it doesn't affect the call sequence.
const Function *F = ExitBB->getParent();
Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+ if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
+ AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
+ CallerRetAttr.hasAttribute(Attributes::SExt))
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
@@ -354,11 +356,13 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
- if (CallerRetAttr & ~Attribute::NoAlias)
+ if (AttrBuilder(CallerRetAttr)
+ .removeAttribute(Attributes::NoAlias).hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
+ CallerRetAttr.hasAttribute(Attributes::SExt))
return false;
// Check if the only use is a function return node.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index bf5d8c4..b2ebf04 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d9be7a1..d74a703 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -33,7 +33,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -67,7 +67,7 @@ static gcp_map_type &getGCMap(void *&P) {
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
unsigned InBits = 0) {
unsigned NumBits = 0;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
@@ -131,9 +131,9 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
}
-/// getTargetData - Return information about data layout.
-const TargetData &AsmPrinter::getTargetData() const {
- return *TM.getTargetData();
+/// getDataLayout - Return information about data layout.
+const DataLayout &AsmPrinter::getDataLayout() const {
+ return *TM.getDataLayout();
}
/// getCurrentSection() - Return the current section we are emitting to.
@@ -160,7 +160,7 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(OutContext, *TM.getTargetData());
+ Mang = new Mangler(OutContext, *TM.getDataLayout());
// Allow the target to emit any magic that it wants at the start of the file.
EmitStartOfAsmFile(M);
@@ -213,16 +213,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::CommonLinkage:
case GlobalValue::LinkOnceAnyLinkage:
case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::LinkOnceODRAutoHideLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
- case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
if ((GlobalValue::LinkageTypes)Linkage !=
- GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ GlobalValue::LinkOnceODRAutoHideLinkage)
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
else
@@ -280,7 +280,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
// If the alignment is specified, we *must* obey it. Overaligning a global
@@ -312,8 +312,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
return;
}
- if (MAI->getLCOMMDirectiveType() != LCOMM::None &&
- (MAI->getLCOMMDirectiveType() != LCOMM::NoAlignment || Align == 1)) {
+ if (Align == 1 ||
+ MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
return;
@@ -482,9 +482,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
"' label emitted multiple times to assembly file");
}
-
-/// EmitComments - Pretty-print comments for instructions.
-static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetMachine &TM = MF->getTarget();
@@ -519,16 +518,16 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
CommentOS << " Reload Reuse\n";
}
-/// EmitImplicitDef - This method emits the specified machine instruction
+/// emitImplicitDef - This method emits the specified machine instruction
/// that is an implicit def.
-static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
unsigned RegNo = MI->getOperand(0).getReg();
AP.OutStreamer.AddComment(Twine("implicit-def: ") +
AP.TM.getRegisterInfo()->getName(RegNo));
AP.OutStreamer.AddBlankLine();
}
-static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
std::string Str = "kill:";
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
@@ -541,10 +540,10 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
AP.OutStreamer.AddBlankLine();
}
-/// EmitDebugValueComment - This method handles the target-independent form
+/// emitDebugValueComment - This method handles the target-independent form
/// of DBG_VALUE, returning true if it was able to do so. A false return
/// means the target will need to handle MI in EmitInstruction.
-static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// This code handles only the 3-operand target-independent form.
if (MI->getNumOperands() != 3)
return false;
@@ -674,7 +673,7 @@ void AsmPrinter::EmitFunctionBody() {
}
if (isVerbose())
- EmitComments(*II, OutStreamer.GetCommentOS());
+ emitComments(*II, OutStreamer.GetCommentOS());
switch (II->getOpcode()) {
case TargetOpcode::PROLOG_LABEL:
@@ -690,15 +689,15 @@ void AsmPrinter::EmitFunctionBody() {
break;
case TargetOpcode::DBG_VALUE:
if (isVerbose()) {
- if (!EmitDebugValueComment(II, *this))
+ if (!emitDebugValueComment(II, *this))
EmitInstruction(II);
}
break;
case TargetOpcode::IMPLICIT_DEF:
- if (isVerbose()) EmitImplicitDef(II, *this);
+ if (isVerbose()) emitImplicitDef(II, *this);
break;
case TargetOpcode::KILL:
- if (isVerbose()) EmitKill(II, *this);
+ if (isVerbose()) emitKill(II, *this);
break;
default:
if (!TM.hasMCUseLoc())
@@ -992,7 +991,7 @@ void AsmPrinter::EmitConstantPool() {
Kind = SectionKind::getReadOnlyWithRelLocal();
break;
case 0:
- switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+ switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) {
case 4: Kind = SectionKind::getMergeableConst4(); break;
case 8: Kind = SectionKind::getMergeableConst8(); break;
case 16: Kind = SectionKind::getMergeableConst16();break;
@@ -1038,7 +1037,7 @@ void AsmPrinter::EmitConstantPool() {
OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
Type *Ty = CPE.getType();
- Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+ Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty);
OutStreamer.EmitLabel(GetCPISymbol(CPI));
if (CPE.isMachineConstantPoolEntry())
@@ -1081,7 +1080,12 @@ void AsmPrinter::EmitJumpTableInfo() {
JTInDiffSection = true;
}
- EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout())));
+
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionJT32);
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
@@ -1123,6 +1127,8 @@ void AsmPrinter::EmitJumpTableInfo() {
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
}
+ if (!JTInDiffSection)
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
@@ -1190,7 +1196,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
assert(Value && "Unknown entry kind!");
- unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+ unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout());
OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
}
@@ -1292,7 +1298,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
}
// Emit the function pointers in the target-specific order
- const TargetData *TD = TM.getTargetData();
+ const DataLayout *TD = TM.getDataLayout();
unsigned Align = Log2_32(TD->getPointerPrefAlignment());
std::stable_sort(Structors.begin(), Structors.end(), priority_order);
for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
@@ -1408,7 +1414,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// if required for correctness.
//
void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
- if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+ if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1422,9 +1428,9 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
// Constant emission.
//===----------------------------------------------------------------------===//
-/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+/// lowerConstant - Lower the specified LLVM Constant to an MCExpr.
///
-static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
MCContext &Ctx = AP.OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
@@ -1447,12 +1453,12 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
switch (CE->getOpcode()) {
default:
// If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using TargetData as a
+ // opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
if (Constant *C =
- ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+ ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
if (C != CE)
- return LowerConstant(C, AP);
+ return lowerConstant(C, AP);
// Otherwise report the problem to the user.
{
@@ -1464,21 +1470,20 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
const Constant *PtrVal = CE->getOperand(0);
SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
- const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+ const MCExpr *Base = lowerConstant(CE->getOperand(0), AP);
if (Offset == 0)
return Base;
// Truncate/sext the offset to the pointer size.
- if (TD.getPointerSizeInBits() != 64) {
- int SExtAmount = 64-TD.getPointerSizeInBits();
- Offset = (Offset << SExtAmount) >> SExtAmount;
- }
+ unsigned Width = TD.getPointerSizeInBits();
+ if (Width < 64)
+ Offset = SignExtend64(Offset, Width);
return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
Ctx);
@@ -1491,26 +1496,26 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
// is reasonable to treat their delta as a 32-bit value.
// FALL THROUGH.
case Instruction::BitCast:
- return LowerConstant(CE->getOperand(0), AP);
+ return lowerConstant(CE->getOperand(0), AP);
case Instruction::IntToPtr: {
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
Constant *Op = CE->getOperand(0);
Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
false/*ZExt*/);
- return LowerConstant(Op, AP);
+ return lowerConstant(Op, AP);
}
case Instruction::PtrToInt: {
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
Constant *Op = CE->getOperand(0);
Type *Ty = CE->getType();
- const MCExpr *OpExpr = LowerConstant(Op, AP);
+ const MCExpr *OpExpr = lowerConstant(Op, AP);
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
@@ -1536,8 +1541,8 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
- const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP);
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP);
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
@@ -1554,7 +1559,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
}
}
-static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
AsmPrinter &AP);
/// isRepeatedByteSequence - Determine whether the given value is
@@ -1578,7 +1583,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64) return -1;
- uint64_t Size = TM.getTargetData()->getTypeAllocSize(V->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType());
uint64_t Value = CI->getZExtValue();
// Make sure the constant is at least 8 bits long and has a power
@@ -1616,13 +1621,13 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
return -1;
}
-static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
unsigned AddrSpace,AsmPrinter &AP){
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
@@ -1672,7 +1677,7 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
}
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
unsigned Size = TD.getTypeAllocSize(CDS->getType());
unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
CDS->getNumElements();
@@ -1681,28 +1686,28 @@ static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
}
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
AsmPrinter &AP) {
// See if we can aggregate some values. Make sure it can be
// represented as a series of bytes of the constant value.
int Value = isRepeatedByteSequence(CA, AP.TM);
if (Value != -1) {
- uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+ uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType());
AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
- EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+ emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
}
}
-static void EmitGlobalConstantVector(const ConstantVector *CV,
+static void emitGlobalConstantVector(const ConstantVector *CV,
unsigned AddrSpace, AsmPrinter &AP) {
for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+ emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
unsigned Size = TD.getTypeAllocSize(CV->getType());
unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
CV->getType()->getNumElements();
@@ -1710,10 +1715,10 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
AP.OutStreamer.EmitZeros(Padding, AddrSpace);
}
-static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+static void emitGlobalConstantStruct(const ConstantStruct *CS,
unsigned AddrSpace, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
- const TargetData *TD = AP.TM.getTargetData();
+ const DataLayout *TD = AP.TM.getDataLayout();
unsigned Size = TD->getTypeAllocSize(CS->getType());
const StructLayout *Layout = TD->getStructLayout(CS->getType());
uint64_t SizeSoFar = 0;
@@ -1727,7 +1732,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
SizeSoFar += FieldSize + PadSize;
// Now print the actual field value.
- EmitGlobalConstantImpl(Field, AddrSpace, AP);
+ emitGlobalConstantImpl(Field, AddrSpace, AP);
// Insert padding - this may include padding to increase the size of the
// current field up to the ABI size (if the struct is not packed) as well
@@ -1738,7 +1743,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
"Layout of constant struct may be incorrect!");
}
-static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
if (CFP->getType()->isHalfTy()) {
if (AP.isVerbose()) {
@@ -1793,7 +1798,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
<< DoubleVal.convertToDouble() << '\n';
}
- if (AP.TM.getTargetData()->isBigEndian()) {
+ if (AP.TM.getDataLayout()->isBigEndian()) {
AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
} else {
@@ -1802,7 +1807,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
}
// Emit the tail padding for the long double.
- const TargetData &TD = *AP.TM.getTargetData();
+ const DataLayout &TD = *AP.TM.getDataLayout();
AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
TD.getTypeStoreSize(CFP->getType()), AddrSpace);
return;
@@ -1814,7 +1819,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
// API needed to prevent premature destruction.
APInt API = CFP->getValueAPF().bitcastToAPInt();
const uint64_t *p = API.getRawData();
- if (AP.TM.getTargetData()->isBigEndian()) {
+ if (AP.TM.getDataLayout()->isBigEndian()) {
AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
} else {
@@ -1823,9 +1828,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
}
}
-static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+static void emitGlobalConstantLargeInt(const ConstantInt *CI,
unsigned AddrSpace, AsmPrinter &AP) {
- const TargetData *TD = AP.TM.getTargetData();
+ const DataLayout *TD = AP.TM.getDataLayout();
unsigned BitWidth = CI->getBitWidth();
assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
@@ -1839,9 +1844,9 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
}
}
-static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
AsmPrinter &AP) {
- const TargetData *TD = AP.TM.getTargetData();
+ const DataLayout *TD = AP.TM.getDataLayout();
uint64_t Size = TD->getTypeAllocSize(CV->getType());
if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
return AP.OutStreamer.EmitZeros(Size, AddrSpace);
@@ -1858,13 +1863,13 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
return;
default:
- EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+ emitGlobalConstantLargeInt(CI, AddrSpace, AP);
return;
}
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
- return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+ return emitGlobalConstantFP(CFP, AddrSpace, AP);
if (isa<ConstantPointerNull>(CV)) {
AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
@@ -1872,19 +1877,19 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
}
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
- return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+ return emitGlobalConstantDataSequential(CDS, AddrSpace, AP);
if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
- return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+ return emitGlobalConstantArray(CVA, AddrSpace, AP);
if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
- return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+ return emitGlobalConstantStruct(CVS, AddrSpace, AP);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
// Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
// vectors).
if (CE->getOpcode() == Instruction::BitCast)
- return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+ return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
if (Size > 8) {
// If the constant expression's size is greater than 64-bits, then we have
@@ -1892,23 +1897,23 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
// that way.
Constant *New = ConstantFoldConstantExpression(CE, TD);
if (New && New != CE)
- return EmitGlobalConstantImpl(New, AddrSpace, AP);
+ return emitGlobalConstantImpl(New, AddrSpace, AP);
}
}
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return EmitGlobalConstantVector(V, AddrSpace, AP);
+ return emitGlobalConstantVector(V, AddrSpace, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
- AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
+ AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace);
}
/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
- uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
if (Size)
- EmitGlobalConstantImpl(CV, AddrSpace, *this);
+ emitGlobalConstantImpl(CV, AddrSpace, *this);
else if (MAI->hasSubsectionsViaSymbols()) {
// If the global has zero size, emit a single byte so that two labels don't
// look like they are at the same location.
@@ -2023,8 +2028,8 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
}
}
-/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
-static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
const MachineLoopInfo *LI,
const AsmPrinter &AP) {
// Add loop depth information
@@ -2090,7 +2095,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
if (const BasicBlock *BB = MBB->getBasicBlock())
if (BB->hasName())
OutStreamer.AddComment("%" + BB->getName());
- EmitBasicBlockLoopComments(*MBB, LI, *this);
+ emitBasicBlockLoopComments(*MBB, LI, *this);
}
// Print the main label for the block.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 90d511c..d94e1fe 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -18,7 +18,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
switch (Encoding & 0x07) {
default: llvm_unreachable("Invalid encoded value.");
- case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+ case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
case dwarf::DW_EH_PE_udata2: return 2;
case dwarf::DW_EH_PE_udata4: return 4;
case dwarf::DW_EH_PE_udata8: return 8;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index db43b06..50f0fc3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -43,10 +43,10 @@ namespace {
};
}
-/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
/// struct above.
-static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
assert(DiagInfo && "Diagnostic context not passed down?");
@@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
}
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+ InlineAsm::AsmDialect Dialect) const {
assert(!Str.empty() && "Can't emit empty inline asm block");
// Remember if the buffer is nul terminated or not so we can avoid a copy.
@@ -91,12 +92,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
bool HasDiagHandler = false;
if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
- // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+ // If the source manager has an issue, we arrange for srcMgrDiagHandler
// to be invoked, getting DiagInfo passed into it.
DiagInfo.LocInfo = LocMDNode;
DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+ SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
HasDiagHandler = true;
}
@@ -126,6 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
+ Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
// Don't implicitly switch to the text section before the asm.
@@ -135,71 +137,113 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
report_fatal_error("Error parsing inline asm\n");
}
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
-/// EmitInlineAsm - This method formats and emits the specified machine
-/// instruction that is an inline asm.
-void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
- assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- // Count the number of register definitions to find the asm string.
- unsigned NumDefs = 0;
- for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
- ++NumDefs)
- assert(NumDefs != NumOperands-2 && "No asm string?");
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
- assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
- // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
- const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$':
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ }
+ if (Done) break;
- // If this asmstr is empty, just print the #APP/#NOAPP markers.
- // These are useful to see where empty asm's wound up.
- if (AsmStr[0] == 0) {
- // Don't emit the comments if writing to a .o file.
- if (!OutStreamer.hasRawTextSupport()) return;
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
- OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
- MAI->getInlineAsmStart());
- OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
- MAI->getInlineAsmEnd());
- return;
- }
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
- // Emit the #APP start marker. This has to happen even if verbose-asm isn't
- // enabled, so we use EmitRawText.
- if (OutStreamer.hasRawTextSupport())
- OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
- MAI->getInlineAsmStart());
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
- // Get the !srcloc metadata node if we have it, and decode the loc cookie from
- // it.
- unsigned LocCookie = 0;
- const MDNode *LocMD = 0;
- for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata() &&
- (LocMD = MI->getOperand(i-1).getMetadata()) &&
- LocMD->getNumOperands() != 0) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
- LocCookie = CI->getZExtValue();
- break;
- }
- }
- }
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
- // Emit the inline asm to a temporary string so we can emit it through
- // EmitInlineAsm.
- SmallString<256> StringData;
- raw_svector_ostream OS(StringData);
+ bool Error = false;
- OS << '\t';
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
- // The variant of the current asmprinter.
- int AsmPrinterVariant = MAI->getAssemblerDialect();
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ 0, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ break;
+ }
+ }
+ }
+ OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
+}
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ int AsmPrinterVariant, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ OS << '\t';
while (*LastEmitted) {
switch (*LastEmitted) {
@@ -272,7 +316,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
" string: '" + Twine(AsmStr) + "'");
std::string Val(StrStart, StrEnd);
- PrintSpecial(MI, OS, Val.c_str());
+ AP->PrintSpecial(MI, OS, Val.c_str());
LastEmitted = StrEnd+1;
break;
}
@@ -340,13 +384,12 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// FIXME: What if the operand isn't an MBB, report error?
OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
else {
- AsmPrinter *AP = const_cast<AsmPrinter*>(this);
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
Modifier[0] ? Modifier : 0,
OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
Modifier[0] ? Modifier : 0, OS);
}
}
@@ -363,7 +406,74 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
}
}
OS << '\n' << (char)0; // null terminate string.
- EmitInlineAsm(OS.str(), LocMD);
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ // Don't emit the comments if writing to a .o file.
+ if (!OutStreamer.hasRawTextSupport()) return;
+
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use EmitRawText.
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+ MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsmVariant == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+ AP, LocCookie, OS);
+ else
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
+ EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
// enabled, so we use EmitRawText.
@@ -409,8 +519,8 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
/// instruction, using the specified assembler variant. Targets should
/// override this to format as appropriate.
bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 3776848..4d73b3c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -17,7 +17,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -182,6 +182,12 @@ void DIEValue::dump() {
void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
unsigned Size = ~0U;
switch (Form) {
+ case dwarf::DW_FORM_flag_present:
+ // Emit something to keep the lines and comments in sync.
+ // FIXME: Is there a better way to do this?
+ if (Asm->OutStreamer.hasRawTextSupport())
+ Asm->OutStreamer.EmitRawText(StringRef(""));
+ return;
case dwarf::DW_FORM_flag: // Fall thru
case dwarf::DW_FORM_ref1: // Fall thru
case dwarf::DW_FORM_data1: Size = 1; break;
@@ -193,7 +199,8 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_data8: Size = 8; break;
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
- case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break;
+ case dwarf::DW_FORM_addr:
+ Size = Asm->getDataLayout().getPointerSize(); break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -203,6 +210,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
///
unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
switch (Form) {
+ case dwarf::DW_FORM_flag_present: return 0;
case dwarf::DW_FORM_flag: // Fall thru
case dwarf::DW_FORM_ref1: // Fall thru
case dwarf::DW_FORM_data1: return sizeof(int8_t);
@@ -214,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_data8: return sizeof(int64_t);
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
+ case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -241,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getTargetData().getPointerSize();
+ return AP->getDataLayout().getPointerSize();
}
#ifndef NDEBUG
@@ -265,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getTargetData().getPointerSize();
+ return AP->getDataLayout().getPointerSize();
}
#ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
index f93ea1b..28a96f3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -214,9 +214,6 @@ namespace llvm {
///
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
- // Implement isa/cast/dyncast.
- static bool classof(const DIEValue *) { return true; }
-
#ifndef NDEBUG
virtual void print(raw_ostream &O) = 0;
void dump();
@@ -257,7 +254,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEInteger *) { return true; }
static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
#ifndef NDEBUG
@@ -286,7 +282,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIELabel *) { return true; }
static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
#ifndef NDEBUG
@@ -313,7 +308,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEDelta *) { return true; }
static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
#ifndef NDEBUG
@@ -343,7 +337,6 @@ namespace llvm {
}
// Implement isa/cast/dyncast.
- static bool classof(const DIEEntry *) { return true; }
static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
#ifndef NDEBUG
@@ -383,7 +376,6 @@ namespace llvm {
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
// Implement isa/cast/dyncast.
- static bool classof(const DIEBlock *) { return true; }
static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
#ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 454a923..05e0f2f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -133,8 +133,8 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
}
}
-// Walk through and emit the buckets for the table. This will look
-// like a list of numbers of how many elements are in each bucket.
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
unsigned index = 0;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 963b8cd..92d1bbe 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -237,8 +237,8 @@ private:
#endif
};
- DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
- void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT
+ DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
+ void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION;
// Internal Functions
void EmitHeader(AsmPrinter *);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index d975f1f..4fdd5ca 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -25,7 +25,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d30e5bb..2b07dda 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -22,7 +22,7 @@
#include "llvm/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
return Value;
}
+/// addFlag - Add a flag that is true.
+void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
+ if (!DD->useDarwinGDBCompat())
+ Die->addValue(Attribute, dwarf::DW_FORM_flag_present,
+ DIEIntegerOne);
+ else
+ addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1);
+}
+
/// addUInt - Add an unsigned integer attribute data and value.
///
void CompileUnit::addUInt(DIE *Die, unsigned Attribute,
@@ -501,7 +510,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
const char *FltPtr = (const char*)FltVal.getRawData();
int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
- bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
int Incr = (LittleEndian ? 1 : -1);
int Start = (LittleEndian ? 0 : NumBytes - 1);
int Stop = (LittleEndian ? NumBytes : -1);
@@ -543,7 +552,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
const uint64_t *Ptr64 = Val.getRawData();
int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
- bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
// Output the constant to DWARF one byte at a time.
for (int i = 0; i < NumBytes; i++) {
@@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
(Language == dwarf::DW_LANG_C89 ||
Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
- addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+ addFlag(&Buffer, dwarf::DW_AT_prototyped);
}
break;
case dwarf::DW_TAG_structure_type:
@@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
- addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+ addFlag(ElemDie, dwarf::DW_AT_explicit);
}
else if (Element.isVariable()) {
DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
addString(ElemDie, dwarf::DW_AT_name, DV.getName());
addType(ElemDie, DV.getType());
- addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addFlag(ElemDie, dwarf::DW_AT_declaration);
+ addFlag(ElemDie, dwarf::DW_AT_external);
addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType()) {
DIDerivedType DDTy(Element);
@@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
if (CTy.isAppleBlockExtension())
- addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
@@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
if (CTy.isObjcClassComplete())
- addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
- dwarf::DW_FORM_flag, 1);
+ addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
// Add template parameters to a class, structure or union types.
// FIXME: The support isn't in the metadata for this yet.
@@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// If we're a forward decl, say so.
if (CTy.isForwardDecl())
- addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ addFlag(&Buffer, dwarf::DW_AT_declaration);
// Add source line info if available.
if (!CTy.isForwardDecl())
@@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// AT_specification code in order to work around a bug in older
// gdbs that requires the linkage name to resolve multiple template
// functions.
+ // TODO: Remove this set of code when we get rid of the old gdb
+ // compatibility.
StringRef LinkageName = SP.getLinkageName();
- if (!LinkageName.empty())
+ if (!LinkageName.empty() && DD->useDarwinGDBCompat())
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
getRealLinkageName(LinkageName));
@@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
return SPDie;
}
+ // Add the linkage name if we have one.
+ if (!LinkageName.empty() && !DD->useDarwinGDBCompat())
+ addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+ getRealLinkageName(LinkageName));
+
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP.getName());
@@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
(Language == dwarf::DW_LANG_C89 ||
Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
- addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+ addFlag(SPDie, dwarf::DW_AT_prototyped);
// Add Return Type.
DICompositeType SPTy = SP.getType();
@@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
}
if (!SP.isDefinition()) {
- addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ addFlag(SPDie, dwarf::DW_AT_declaration);
// Add arguments. Do not add arguments for subprogram definition. They will
// be handled while processing variables.
@@ -1090,22 +1105,22 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SPTag == dwarf::DW_TAG_subroutine_type)
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(DIType(Args.getElement(i)));
+ DIType ATy = DIType(Args.getElement(i));
addType(Arg, ATy);
if (ATy.isArtificial())
- addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ addFlag(Arg, dwarf::DW_AT_artificial);
SPDie->addChild(Arg);
}
}
if (SP.isArtificial())
- addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ addFlag(SPDie, dwarf::DW_AT_artificial);
if (!SP.isLocalToUnit())
- addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addFlag(SPDie, dwarf::DW_AT_external);
if (SP.isOptimized())
- addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
if (unsigned isa = Asm->getISAEncoding()) {
addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
@@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
// Add scoping info.
if (!GV.isLocalToUnit())
- addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ addFlag(VariableDIE, dwarf::DW_AT_external);
// Add line number info.
addSourceLine(VariableDIE, GV);
@@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
dwarf::DW_FORM_ref4, VariableDIE);
addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
- addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag,
- 1);
+ addFlag(VariableDIE, dwarf::DW_AT_declaration);
addDie(VariableSpecDIE);
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
@@ -1213,7 +1227,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
SmallVector<Value*, 3> Idx(CE->op_begin()+1, CE->op_end());
addUInt(Block, 0, dwarf::DW_FORM_udata,
- Asm->getTargetData().getIndexedOffset(Ptr->getType(), Idx));
+ Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
@@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
DICompositeType *CTy) {
Buffer.setTag(dwarf::DW_TAG_array_type);
if (CTy->getTag() == dwarf::DW_TAG_vector_type)
- addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+ addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
// Emit derived type.
addType(&Buffer, CTy->getTypeDerivedFrom());
@@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
}
if (DV->isArtificial())
- addUInt(VariableDie, dwarf::DW_AT_artificial,
- dwarf::DW_FORM_flag, 1);
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
if (isScopeAbstract) {
DV->setDIE(VariableDie);
@@ -1446,7 +1459,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
Offset -= FieldOffset;
// Maybe we need to work from the other end.
- if (Asm->getTargetData().isLittleEndian())
+ if (Asm->getDataLayout().isLittleEndian())
Offset = FieldSize - (Offset + Size);
addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index b4ff9e8..fad9b6e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -176,6 +176,9 @@ public:
}
public:
+ /// addFlag - Add a flag that is true to the DIE.
+ void addFlag(DIE *Die, unsigned Attribute);
+
/// addUInt - Add an unsigned integer attribute data and value.
///
void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
@@ -280,8 +283,8 @@ public:
/// for the given DITemplateTypeParameter.
DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
- /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
- /// for the given DITemplateValueParameter.
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create
+ /// new DIE for the given DITemplateValueParameter.
DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 649684a..367b523 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -54,9 +54,29 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absence of debug location information explicit."),
cl::init(false));
-static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+namespace {
+ enum DefaultOnOff {
+ Default, Enable, Disable
+ };
+}
+
+static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::desc("Output prototype dwarf accelerator tables."),
- cl::init(false));
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
+ cl::desc("Compatibility with Darwin gdb."),
+ cl::values(
+ clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"),
+ clEnumValEnd),
+ cl::init(Default));
namespace {
const char *DWARFGroupName = "DWARF Emission";
@@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
- // Turn on accelerator tables for Darwin.
- if (Triple(M->getTargetTriple()).isOSDarwin())
- DwarfAccelTables = true;
-
+ // Turn on accelerator tables and older gdb compatibility
+ // for Darwin.
+ bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ if (DarwinGDBCompat == Default) {
+ if (isDarwin)
+ isDarwinGDBCompat = true;
+ else
+ isDarwinGDBCompat = false;
+ } else
+ isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false;
+
+ if (DwarfAccelTables == Default) {
+ if (isDarwin)
+ hasDwarfAccelTables = true;
+ else
+ hasDwarfAccelTables = false;
+ } else
+ hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
+
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
beginModule(M);
@@ -272,44 +307,51 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
- DISubprogram SPDecl = SP.getFunctionDeclaration();
- if (!SPDecl.isSubprogram()) {
- // There is not any need to generate specification DIE for a function
- // defined at compile unit level. If a function is defined inside another
- // function then gdb prefers the definition at top level and but does not
- // expect specification DIE in parent function. So avoid creating
- // specification DIE for a function defined inside a function.
- if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
- !SP.getContext().isFile() &&
- !isSubprogramContext(SP.getContext())) {
- SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
- // Add arguments.
- DICompositeType SPTy = SP.getType();
- DIArray Args = SPTy.getTypeArray();
- unsigned SPTag = SPTy.getTag();
- if (SPTag == dwarf::DW_TAG_subroutine_type)
- for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(DIType(Args.getElement(i)));
- SPCU->addType(Arg, ATy);
- if (ATy.isArtificial())
- SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
- SPDie->addChild(Arg);
- }
- DIE *SPDeclDie = SPDie;
- SPDie = new DIE(dwarf::DW_TAG_subprogram);
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
- SPDeclDie);
- SPCU->addDie(SPDie);
- }
- }
- // Pick up abstract subprogram DIE.
+ // If we're updating an abstract DIE, then we will be adding the children and
+ // object pointer later on. But what we don't want to do is process the
+ // concrete DIE twice.
if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ // Pick up abstract subprogram DIE.
SPDie = new DIE(dwarf::DW_TAG_subprogram);
SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, AbsSPDIE);
SPCU->addDie(SPDie);
+ } else {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ if (!SPDecl.isSubprogram()) {
+ // There is not any need to generate specification DIE for a function
+ // defined at compile unit level. If a function is defined inside another
+ // function then gdb prefers the definition at top level and but does not
+ // expect specification DIE in parent function. So avoid creating
+ // specification DIE for a function defined inside a function.
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+ !SP.getContext().isFile() &&
+ !isSubprogramContext(SP.getContext())) {
+ SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ DIType ATy = DIType(Args.getElement(i));
+ SPCU->addType(Arg, ATy);
+ if (ATy.isArtificial())
+ SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
+ if (ATy.isObjectPointer())
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, Arg);
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPDeclDie);
+ SPCU->addDie(SPDie);
+ }
+ }
}
SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
@@ -346,7 +388,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size()
- * Asm->getTargetData().getPointerSize());
+ * Asm->getDataLayout().getPointerSize());
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -386,7 +428,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
DISubprogram InlinedSP = getDISubprogram(DS);
DIE *OriginDIE = TheCU->getDIE(InlinedSP);
if (!OriginDIE) {
- DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram.");
+ DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
return NULL;
}
@@ -395,7 +437,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
if (StartLabel == 0 || EndLabel == 0) {
- llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
+ llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
}
assert(StartLabel->isDefined() &&
"Invalid starting label for an inlined scope!");
@@ -412,7 +454,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
// DW_AT_ranges appropriately.
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size()
- * Asm->getTargetData().getPointerSize());
+ * Asm->getDataLayout().getPointerSize());
for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -461,21 +503,26 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
return NULL;
SmallVector<DIE *, 8> Children;
+ DIE *ObjectPointer = NULL;
// Collect arguments for current function.
if (LScopes.isCurrentFunctionScope(Scope))
for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
if (DbgVariable *ArgDV = CurrentFnArguments[i])
if (DIE *Arg =
- TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope()))
+ TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) {
Children.push_back(Arg);
+ if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+ }
// Collect lexical scope children first.
const SmallVector<DbgVariable *, 8> &Variables = ScopeVariables.lookup(Scope);
for (unsigned i = 0, N = Variables.size(); i < N; ++i)
if (DIE *Variable =
- TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope()))
+ TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) {
Children.push_back(Variable);
+ if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+ }
const SmallVector<LexicalScope *, 4> &Scopes = Scope->getChildren();
for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
@@ -509,6 +556,10 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
E = Children.end(); I != E; ++I)
ScopeDIE->addChild(*I);
+ if (DS.isSubprogram() && ObjectPointer != NULL)
+ TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer,
+ dwarf::DW_FORM_ref4, ObjectPointer);
+
if (DS.isSubprogram())
TheCU->addPubTypes(DISubprogram(DS));
@@ -556,7 +607,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+ CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die,
+ Asm, this);
NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
@@ -575,7 +627,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
if (!CompilationDir.empty())
NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
- NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+ NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
@@ -755,7 +807,7 @@ void DwarfDebug::endModule() {
LexicalScope *Scope =
new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
DeadFnScopeMap[SP] = Scope;
-
+
// Construct subprogram DIE and add variables DIEs.
CompileUnit *SPCU = CUMap.lookup(TheCU);
assert(SPCU && "Unable to find Compile Unit!");
@@ -802,9 +854,9 @@ void DwarfDebug::endModule() {
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
// End text sections.
- for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
- Asm->OutStreamer.SwitchSection(SectionMap[i]);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+ for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) {
+ Asm->OutStreamer.SwitchSection(SectionMap[I]);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1));
}
// Compute DIE offsets and sizes.
@@ -816,8 +868,8 @@ void DwarfDebug::endModule() {
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit info into a dwarf accelerator table sections.
- if (DwarfAccelTables) {
+ // Emit info into the dwarf accelerator table sections.
+ if (useDwarfAccelTables()) {
emitAccelNames();
emitAccelObjC();
emitAccelNamespaces();
@@ -825,7 +877,10 @@ void DwarfDebug::endModule() {
}
// Emit info into a debug pubtypes section.
- emitDebugPubTypes();
+ // TODO: When we don't need the option anymore we can
+ // remove all of the code that adds to the table.
+ if (useDarwinGDBCompat())
+ emitDebugPubTypes();
// Emit info into a debug loc section.
emitDebugLoc();
@@ -840,7 +895,11 @@ void DwarfDebug::endModule() {
emitDebugMacInfo();
// Emit inline info.
- emitDebugInlineInfo();
+ // TODO: When we don't need the option anymore we
+ // can remove all of the code that this section
+ // depends upon.
+ if (useDarwinGDBCompat())
+ emitDebugInlineInfo();
// Emit info into a debug str section.
emitDebugStr();
@@ -1014,7 +1073,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
if (AbsVar)
AbsVar->setMInsn(MInsn);
- // Simple ranges that are fully coalesced.
+ // Simplify ranges that are fully coalesced.
if (History.size() <= 1 || (History.size() == 2 &&
MInsn->isIdenticalTo(History.back()))) {
RegVar->setMInsn(MInsn);
@@ -1267,7 +1326,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Coalesce identical entries at the end of History.
if (History.size() >= 2 &&
Prev->isIdenticalTo(History[History.size() - 2])) {
- DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+ DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
<< "\t" << *Prev
<< "\t" << *History[History.size() - 2] << "\n");
History.pop_back();
@@ -1283,7 +1342,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
PrevMBB->getLastNonDebugInstr();
if (LastMI == PrevMBB->end()) {
// Drop DBG_VALUE for empty range.
- DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+ DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
<< "\t" << *Prev << "\n");
History.pop_back();
}
@@ -1300,9 +1359,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MI->isLabel())
AtBlockEntry = false;
- // First known non DBG_VALUE location marks beginning of function
- // body.
- if (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ if (!MI->getFlag(MachineInstr::FrameSetup) &&
+ (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
PrologEndLoc = MI->getDebugLoc();
// Check if the instruction clobbers any registers with debug vars.
@@ -1382,7 +1442,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
MF->getFunction()->getContext());
recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
FnStartDL.getScope(MF->getFunction()->getContext()),
- DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0);
+ 0);
}
}
@@ -1439,8 +1499,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
- TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
- dwarf::DW_FORM_flag, 1);
+ TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
MMI->getFrameMoves()));
@@ -1710,7 +1769,7 @@ void DwarfDebug::emitDebugInfo() {
Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
DwarfAbbrevSectionSym);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
emitDIE(Die);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
@@ -1756,14 +1815,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(0);
Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
Asm->EmitInt8(dwarf::DW_LNE_set_address);
Asm->OutStreamer.AddComment("Section end label");
Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
- Asm->getTargetData().getPointerSize(),
+ Asm->getDataLayout().getPointerSize(),
0/*AddrSpace*/);
// Mark end of matrix.
@@ -1992,7 +2051,7 @@ void DwarfDebug::emitDebugLoc() {
// Start the dwarf loc section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
- unsigned char Size = Asm->getTargetData().getPointerSize();
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2089,7 +2148,7 @@ void DwarfDebug::emitDebugRanges() {
// Start the dwarf ranges section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
- unsigned char Size = Asm->getTargetData().getPointerSize();
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
for (SmallVector<const MCSymbol *, 8>::iterator
I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
@@ -2147,7 +2206,7 @@ void DwarfDebug::emitDebugInlineInfo() {
Asm->OutStreamer.AddComment("Dwarf Version");
Asm->EmitInt16(dwarf::DWARF_VERSION);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2178,7 +2237,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
Asm->OutStreamer.EmitSymbolValue(LI->first,
- Asm->getTargetData().getPointerSize(),0);
+ Asm->getDataLayout().getPointerSize(),0);
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index d1d6512..61d9a51 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -21,9 +21,9 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/DebugLoc.h"
@@ -96,7 +96,8 @@ typedef struct DotDebugLocEntry {
DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
: Begin(B), End(E), Variable(0), Merged(false),
Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
- DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+ const ConstantInt *IPtr)
: Begin(B), End(E), Variable(0), Merged(false),
Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
@@ -158,11 +159,19 @@ public:
bool isArtificial() const {
if (Var.isArtificial())
return true;
- if (Var.getTag() == dwarf::DW_TAG_arg_variable
- && getType().isArtificial())
+ if (getType().isArtificial())
return true;
return false;
}
+
+ bool isObjectPointer() const {
+ if (Var.isObjectPointer())
+ return true;
+ if (getType().isObjectPointer())
+ return true;
+ return false;
+ }
+
bool variableHasComplexAddress() const {
assert(Var.Verify() && "Invalid complex DbgVariable!");
return Var.hasComplexAddress();
@@ -222,7 +231,7 @@ class DwarfDebug {
/// SectionMap - Provides a unique id per text section.
///
- UniqueVector<const MCSection*> SectionMap;
+ SetVector<const MCSection*> SectionMap;
/// CurrentFnArguments - List of Arguments (DbgValues) for current function.
SmallVector<DbgVariable *, 8> CurrentFnArguments;
@@ -307,6 +316,9 @@ class DwarfDebug {
// table for the same directory as DW_at_comp_dir.
StringRef CompilationDir;
+ // A holder for the DarwinGDBCompat flag so that the compile unit can use it.
+ bool isDarwinGDBCompat;
+ bool hasDwarfAccelTables;
private:
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -520,6 +532,11 @@ public:
/// getStringPoolEntry - returns an entry into the string pool with the given
/// string text.
MCSymbol *getStringPoolEntry(StringRef Str);
+
+ /// useDarwinGDBCompat - returns whether or not to limit some of our debug
+ /// output to the limitations of darwin gdb.
+ bool useDarwinGDBCompat() { return isDarwinGDBCompat; }
+ bool useDwarfAccelTables() { return hasDwarfAccelTables; }
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 70cc2e5..08fb6b3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
// that we're omitting that bit.
TTypeEncoding = dwarf::DW_EH_PE_omit;
// dwarf::DW_EH_PE_absptr
- TypeFormatSize = Asm->getTargetData().getPointerSize();
+ TypeFormatSize = Asm->getDataLayout().getPointerSize();
} else {
// Okay, we have actual filters or typeinfos to emit. As such, we need to
// pick a type encoding for them. We're about to emit a list of pointers to
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 75f6056..fe9e493 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -43,26 +43,6 @@ protected:
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
- /// EmitExceptionTable - Emit landing pads and actions.
- ///
- /// The general organization of the table is complex, but the basic concepts
- /// are easy. First there is a header which describes the location and
- /// organization of the three components that follow.
- /// 1. The landing pad site information describes the range of code covered
- /// by the try. In our case it's an accumulation of the ranges covered
- /// by the invokes in the try. There is also a reference to the landing
- /// pad that handles the exception once processed. Finally an index into
- /// the actions table.
- /// 2. The action table, in our case, is composed of pairs of type ids
- /// and next action offset. Starting with the action index from the
- /// landing pad site, each type Id is checked for a match to the current
- /// exception. If it matches then the exception and type id are passed
- /// on to the landing pad. Otherwise the next action is looked up. This
- /// chain is terminated with a next action of zero. If no type id is
- /// found the frame is unwound and handling continues.
- /// 3. Type id table contains references to all the C++ typeinfo for all
- /// catches in the function. This tables is reversed indexed base 1.
-
/// SharedTypeIds - How many leading type ids two landing pads have in common.
static unsigned SharedTypeIds(const LandingPadInfo *L,
const LandingPadInfo *R);
@@ -119,6 +99,26 @@ protected:
const RangeMapType &PadMap,
const SmallVectorImpl<const LandingPadInfo *> &LPs,
const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// EmitExceptionTable - Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
void EmitExceptionTable();
public:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 1153817..f7c0119 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -20,7 +20,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/SmallString.h"
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
/// either condition is detected in a function which uses the GC.
///
void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
- unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+ unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index b83aa5a..70742a8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index fb65bb7..6f4c5a2 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -357,9 +357,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
--I2;
while (I2->isDebugValue()) {
- if (I2 == MBB2->begin()) {
+ if (I2 == MBB2->begin())
return TailLen;
- }
--I2;
}
++I2;
@@ -482,21 +481,19 @@ bool
BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
if (getHash() < o.getHash())
return true;
- else if (getHash() > o.getHash())
+ if (getHash() > o.getHash())
return false;
- else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ if (getBlock()->getNumber() < o.getBlock()->getNumber())
return true;
- else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ if (getBlock()->getNumber() > o.getBlock()->getNumber())
return false;
- else {
- // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
- // an object with itself.
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
#ifndef _GLIBCXX_DEBUG
- llvm_unreachable("Predecessor appears twice");
+ llvm_unreachable("Predecessor appears twice");
#else
- return false;
+ return false;
#endif
- }
}
/// CountTerminators - Count the number of terminators in the given
@@ -574,7 +571,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
if (EffectiveTailLen >= 2 &&
- MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ MF->getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
@@ -1554,8 +1552,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Uses.insert(*AI);
} else {
- if (Uses.count(Reg)) {
- Uses.erase(Reg);
+ if (Uses.erase(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 939af3f..dee339a 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -9,7 +9,6 @@
#define DEBUG_TYPE "calcspillweights"
-#include "llvm/Function.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
- << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -166,7 +164,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
continue;
float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isPhysicalRegister(hint)) {
- if (hweight > bestPhys && LIS.isAllocatable(hint))
+ if (hweight > bestPhys && mri.isAllocatable(hint))
bestPhys = hweight, hintPhys = hint;
} else {
if (hweight > bestVirt)
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 0b747fd..22b9140 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
@@ -50,7 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
if (MinAlign > (int)Align)
Align = MinAlign;
MF.getFrameInfo()->ensureMaxAlignment(Align);
- TM.getTargetLowering()->HandleByVal(this, Size);
+ TM.getTargetLowering()->HandleByVal(this, Size, Align);
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index fb2c2e8..a53f6f8 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCopyPropagationPass(Registry);
initializeMachineCSEPass(Registry);
initializeMachineDominatorTreePass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
@@ -56,6 +57,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegisterCoalescerPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackProtectorPass(Registry);
+ initializeStackColoringPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStrongPHIEliminationPass(Registry);
initializeTailDuplicatePassPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
index 99233df..d8e06c3 100644
--- a/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
+++ b/contrib/llvm/lib/CodeGen/CodePlacementOpt.cpp
@@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
///
bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
const Function *F = MF.getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
+ if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
return false;
unsigned Align = TLI->getPrefLoopAlignment();
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a9de1c749..377b471 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -527,7 +527,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (Edge->getKind() == SDep::Anti) {
AntiDepReg = Edge->getReg();
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
- if (!RegClassInfo.isAllocatable(AntiDepReg))
+ if (!MRI.isAllocatable(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.test(AntiDepReg))
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index b4394e8..8964269 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -33,7 +33,6 @@ namespace {
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
BitVector LivePhysRegs;
- BitVector ReservedRegs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -70,7 +69,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// Don't delete live physreg defs, or any reserved register defs.
- if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
if (!MRI->use_nodbg_empty(Reg))
@@ -90,9 +89,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
- // Treat reserved registers as always live.
- ReservedRegs = TRI->getReservedRegs(MF);
-
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
@@ -101,7 +97,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
// Start out assuming that reserved registers are live out of this block.
- LivePhysRegs = ReservedRegs;
+ LivePhysRegs = MRI->getReservedRegs();
// Also add any explicit live-out physregs for this block.
if (!MBB->empty() && MBB->back().isReturn())
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index f9347ef..d5d8404 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -18,7 +18,6 @@
#define DEBUG_TYPE "early-ifcvt"
#include "MachineTraceMetrics.h"
-#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -32,9 +31,9 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -775,11 +774,11 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
- << "********** Function: "
- << ((Value*)MF.getFunction())->getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
- SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
+ SchedModel =
+ MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel();
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -798,6 +797,5 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
if (tryConvertIf(I->getBlock()))
Changed = true;
- MF.verify(this, "After early if-conversion");
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index fee8e47..ed78f19 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -626,9 +626,12 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
dv->Instrs.push_back(mi);
- // Finally set all defs and non-collapsed uses to dv.
- for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &mo = mi->getOperand(i);
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+ ee = mi->operands_end();
+ ii != ee; ++ii) {
+ MachineOperand &mo = *ii;
if (!mo.isReg()) continue;
int rx = regIndex(mo.getReg());
if (rx < 0) continue;
@@ -654,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
bool anyregs = false;
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true;
break;
}
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 7a17331..ffe4b63 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "postrapseudos"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Machine Function\n"
<< "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
- << "********** Function: "
- << MF.getFunction()->getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 506b5cf..f4755bb 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -387,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
const TargetFrameLowering *TFI = TM->getFrameLowering();
assert(TFI && "TargetRegisterInfo not available!");
- for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
- RE = FI->roots_end(); RI != RE; ++RI)
- RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+ ++RI;
+ }
+ }
}
bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index 4214ba1..31e36f0 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -13,7 +13,6 @@
#define DEBUG_TYPE "ifcvt"
#include "BranchFolding.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
}
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
- << MF.getFunction()->getName() << "\'");
+ << MF.getName() << "\'");
if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
DEBUG(dbgs() << " skipped\n");
@@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
}
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
unsigned Reg = Defs[i];
- if (Redefs.count(Reg)) {
+ if (!Redefs.insert(Reg)) {
if (AddImpUse)
// Treat predicated update as read + write.
MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
true/*IsImp*/,false/*IsKill*/,
false/*IsDead*/,true/*IsUndef*/));
} else {
- Redefs.insert(Reg);
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Redefs.insert(*SubRegs);
}
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 07e37af..37828a7 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
propagateSiblingValue(SVI);
} while (!WorkList.empty());
- // Look up the value we were looking for. We already did this lokup at the
+ // Look up the value we were looking for. We already did this lookup at the
// top of the function, but SibValues may have been invalidated.
SVI = SibValues.find(UseVNI);
assert(SVI != SibValues.end() && "Didn't compute requested info");
@@ -863,7 +863,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// If the instruction also writes VirtReg.reg, it had better not require the
// same register for uses and defs.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::RegInfo RI =
+ MIBundleOperands::VirtRegInfo RI =
MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
if (RI.Tied) {
markValueUsed(&VirtReg, ParentVNI);
@@ -1142,7 +1142,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::RegInfo RI =
+ MIBundleOperands::VirtRegInfo RI =
MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8d2282a..6120ae56 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -21,7 +21,7 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
using namespace llvm;
template <class ArgIt>
@@ -457,7 +457,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break; // Strip out annotate intrinsic
case Intrinsic::memcpy: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Type *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -468,7 +468,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memmove: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Type *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
@@ -479,7 +479,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::memset: {
- IntegerType *IntPtr = TD.getIntPtrType(Context);
+ Type *IntPtr = TD.getIntPtrType(Context);
Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index d631726..defc127 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
clear();
LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
- << ((Value*)mf.getFunction())->getName()
- << " **********\n");
+ << mf.getName() << " **********\n");
bool Changed = collectDebugValues(mf);
computeIntervals();
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 0a795e6..8585cbb 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "RegisterCoalescer.h"
#include <algorithm>
using namespace llvm;
@@ -58,8 +59,16 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
return VNI;
}
if (SlotIndex::isSameInstr(Def, I->start)) {
- assert(I->start == Def && "Cannot insert def, already live");
- assert(I->valno->def == Def && "Inconsistent existing value def");
+ assert(I->valno->def == I->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, I->start);
+ if (Def != I->start)
+ I->start = I->valno->def = Def;
return I->valno;
}
assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def");
@@ -68,21 +77,6 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def,
return VNI;
}
-/// killedInRange - Return true if the interval has kills in [Start,End).
-bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
- Ranges::const_iterator r =
- std::lower_bound(ranges.begin(), ranges.end(), End);
-
- // Now r points to the first interval with start >= End, or ranges.end().
- if (r == ranges.begin())
- return false;
-
- --r;
- // Now r points to the last interval with end <= End.
- // r->end is the kill point.
- return r->end >= Start && r->end < End;
-}
-
// overlaps - Return true if the intersection of the two live intervals is
// not empty.
//
@@ -142,6 +136,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
return false;
}
+bool LiveInterval::overlaps(const LiveInterval &Other,
+ const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty interval");
+ if (Other.empty())
+ return false;
+
+ // Use binary searches to find initial positions.
+ const_iterator I = find(Other.beginIndex());
+ const_iterator IE = end();
+ if (I == IE)
+ return false;
+ const_iterator J = Other.find(I->start);
+ const_iterator JE = Other.end();
+ if (J == JE)
+ return false;
+
+ for (;;) {
+ // J has just been advanced to satisfy:
+ assert(J->end >= I->start);
+ // Check for an overlap.
+ if (J->start < I->end) {
+ // I and J are overlapping. Find the later start.
+ SlotIndex Def = std::max(I->start, J->start);
+ // Allow the overlap if Def is a coalescable copy.
+ if (Def.isBlock() ||
+ !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+ return true;
+ }
+ // Advance the iterator that ends first to check for more overlaps.
+ if (J->end > I->end) {
+ std::swap(I, J);
+ std::swap(IE, JE);
+ }
+ // Advance J until J->end >= I->start.
+ do
+ if (++J == JE)
+ return false;
+ while (J->end < I->start);
+ }
+}
+
/// overlaps - Return true if the live interval overlaps a range specified
/// by [Start, End).
bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
@@ -399,7 +435,7 @@ void LiveInterval::join(LiveInterval &Other,
// If we have to apply a mapping to our base interval assignment, rewrite it
// now.
- if (MustMapCurValNos) {
+ if (MustMapCurValNos && !empty()) {
// Map the first live range.
iterator OutIt = begin();
@@ -673,27 +709,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
return V2;
}
-void LiveInterval::Copy(const LiveInterval &RHS,
- MachineRegisterInfo *MRI,
- VNInfo::Allocator &VNInfoAllocator) {
- ranges.clear();
- valnos.clear();
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
- MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
-
- weight = RHS.weight;
- for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
- const VNInfo *VNI = RHS.getValNumInfo(i);
- createValueCopy(VNI, VNInfoAllocator);
- }
- for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
- const LiveRange &LR = RHS.ranges[i];
- addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
- }
-
- verify();
-}
-
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
for (const_iterator I = begin(), E = end(); I != E; ++I)
@@ -705,9 +720,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveRange::dump() const {
dbgs() << *this << "\n";
}
+#endif
void LiveInterval::print(raw_ostream &OS) const {
if (empty())
@@ -740,9 +757,11 @@ void LiveInterval::print(raw_ostream &OS) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveInterval::dump() const {
dbgs() << *this << "\n";
}
+#endif
#ifndef NDEBUG
void LiveInterval::verify() const {
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index d0f8ae1..4e75d89 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -34,6 +34,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "LiveRangeCalc.h"
+#include "VirtRegMap.h"
#include <algorithm>
#include <limits>
#include <cmath>
@@ -109,8 +110,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
DomTree = &getAnalysis<MachineDominatorTree>();
if (!LRCalc)
LRCalc = new LiveRangeCalc();
- AllocatableRegs = TRI->getAllocatableSet(fn);
- ReservedRegs = TRI->getReservedRegs(fn);
// Allocate space for all virtual registers.
VirtRegIntervals.resize(MRI->getNumVirtRegs());
@@ -147,6 +146,11 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n';
}
+ OS << "RegMasks:";
+ for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+ OS << ' ' << RegMaskSlots[i];
+ OS << '\n';
+
printInstrs(OS);
}
@@ -155,9 +159,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
MF->print(OS, Indexes);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
+#endif
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
@@ -382,8 +388,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
/// which a variable is live
void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
- << "********** Function: "
- << ((Value*)MF->getFunction())->getName() << '\n');
+ << "********** Function: " << MF->getName() << '\n');
RegMaskBlocks.resize(MF->getNumBlockIDs());
@@ -440,7 +445,7 @@ void LiveIntervals::computeIntervals() {
// Compute the number of register mask instructions in this block.
std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
- RMB.second = RegMaskSlots.size() - RMB.first;;
+ RMB.second = RegMaskSlots.size() - RMB.first;
}
// Create empty intervals for registers defined by implicit_def's (except
@@ -497,7 +502,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(MO->getRegMask());
}
// Compute the number of register mask instructions in this block.
- RMB.second = RegMaskSlots.size() - RMB.first;;
+ RMB.second = RegMaskSlots.size() - RMB.first;
}
}
@@ -540,11 +545,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) {
// Ignore uses of reserved registers. We only track defs of those.
for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
unsigned Root = *Roots;
- if (!isReserved(Root) && !MRI->reg_empty(Root))
+ if (!MRI->isReserved(Root) && !MRI->reg_empty(Root))
LRCalc->extendToUses(LI, Root);
for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) {
unsigned Reg = *Supers;
- if (!isReserved(Reg) && !MRI->reg_empty(Reg))
+ if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
LRCalc->extendToUses(LI, Reg);
}
}
@@ -729,17 +734,100 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
return CanSeparate;
}
+void LiveIntervals::extendToIndices(LiveInterval *LI,
+ ArrayRef<SlotIndex> Indices) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ LRCalc->extend(LI, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveRangeQuery LRQ(*LI, Kill);
+ VNInfo *VNI = LRQ.valueOut();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBStart, MBBEnd;
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LI->removeRange(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ VisitedTy Visited;
+ for (MachineBasicBlock::succ_iterator
+ SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+ SuccI != SuccE; ++SuccI) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveRangeQuery LRQ(*LI, MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI live range. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LI->removeRange(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LI->removeRange(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+ }
+}
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
-void LiveIntervals::addKillFlags() {
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+ // Keep track of regunit ranges.
+ SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU;
+
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
LiveInterval *LI = &getInterval(Reg);
+ if (LI->empty())
+ continue;
+
+ // Find the regunit intervals for the assigned register. They may overlap
+ // the virtual register live range, cancelling any kills.
+ RU.clear();
+ for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
+ ++Units) {
+ LiveInterval *RUInt = &getRegUnit(*Units);
+ if (RUInt->empty())
+ continue;
+ RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end)));
+ }
// Every instruction that kills Reg corresponds to a live range end point.
for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
@@ -750,7 +838,32 @@ void LiveIntervals::addKillFlags() {
MachineInstr *MI = getInstructionFromIndex(RI->end);
if (!MI)
continue;
- MI->addRegisterKilled(Reg, NULL);
+
+ // Check if any of the reguints are live beyond the end of RI. That could
+ // happen when a physreg is defined as a copy of a virtreg:
+ //
+ // %EAX = COPY %vreg5
+ // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
+ // BAR %EAX<kill>
+ //
+ // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+ bool CancelKill = false;
+ for (unsigned u = 0, e = RU.size(); u != e; ++u) {
+ LiveInterval *RInt = RU[u].first;
+ LiveInterval::iterator &I = RU[u].second;
+ if (I == RInt->end())
+ continue;
+ I = RInt->advanceTo(I, RI->end);
+ if (I == RInt->end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI.
+ CancelKill = true;
+ break;
+ }
+ if (CancelKill)
+ MI->clearRegisterKills(Reg, NULL);
+ else
+ MI->addRegisterKilled(Reg, NULL);
}
}
}
@@ -900,497 +1013,321 @@ private:
LiveIntervals& LIS;
const MachineRegisterInfo& MRI;
const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
SlotIndex NewIdx;
-
- typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
- typedef DenseSet<IntRangePair> RangeSet;
-
- struct RegRanges {
- LiveRange* Use;
- LiveRange* EC;
- LiveRange* Dead;
- LiveRange* Def;
- RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
- };
- typedef DenseMap<unsigned, RegRanges> BundleRanges;
+ SmallPtrSet<LiveInterval*, 8> Updated;
+ bool UpdateFlags;
public:
HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
- const TargetRegisterInfo& TRI, SlotIndex NewIdx)
- : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
- // Update intervals for all operands of MI from OldIdx to NewIdx.
- // This assumes that MI used to be at OldIdx, and now resides at
- // NewIdx.
- void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
- assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
- // Collect the operands.
- RangeSet Entering, Internal, Exiting;
- bool hasRegMaskOp = false;
- collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
- // To keep the LiveRanges valid within an interval, move the ranges closest
- // to the destination first. This prevents ranges from overlapping, to that
- // APIs like removeRange still work.
- if (NewIdx < OldIdx) {
- moveAllEnteringFrom(OldIdx, Entering);
- moveAllInternalFrom(OldIdx, Internal);
- moveAllExitingFrom(OldIdx, Exiting);
- }
- else {
- moveAllExitingFrom(OldIdx, Exiting);
- moveAllInternalFrom(OldIdx, Internal);
- moveAllEnteringFrom(OldIdx, Entering);
- }
-
- if (hasRegMaskOp)
- updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
- LIValidator validator;
- validator = std::for_each(Entering.begin(), Entering.end(), validator);
- validator = std::for_each(Internal.begin(), Internal.end(), validator);
- validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
- assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveInterval *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags)
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
}
- // Update intervals for all operands of MI to refer to BundleStart's
- // SlotIndex.
- void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
- if (MI == BundleStart)
- return; // Bundling instr with itself - nothing to do.
-
- SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
- assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
- "SlotIndex <-> Instruction mapping broken for MI");
-
- // Collect all ranges already in the bundle.
- MachineBasicBlock::instr_iterator BII(BundleStart);
- RangeSet Entering, Internal, Exiting;
- bool hasRegMaskOp = false;
- collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
- if (&*BII == MI)
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ bool hasRegMask = false;
+ for (MIOperands MO(MI); MO.isValid(); ++MO) {
+ if (MO->isRegMask())
+ hasRegMask = true;
+ if (!MO->isReg())
continue;
- collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
- }
-
- BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
- Entering.clear();
- Internal.clear();
- Exiting.clear();
- collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
- assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ if (MO->isUse())
+ MO->setIsKill(false);
- DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
- DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
- DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
- moveAllEnteringFromInto(OldIdx, Entering, BR);
- moveAllInternalFromInto(OldIdx, Internal, BR);
- moveAllExitingFromInto(OldIdx, Exiting, BR);
-
-
-#ifndef NDEBUG
- LIValidator validator;
- validator = std::for_each(Entering.begin(), Entering.end(), validator);
- validator = std::for_each(Internal.begin(), Internal.end(), validator);
- validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
- assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
- }
-
-private:
-
-#ifndef NDEBUG
- class LIValidator {
- private:
- DenseSet<const LiveInterval*> Checked, Bogus;
- public:
- void operator()(const IntRangePair& P) {
- const LiveInterval* LI = P.first;
- if (Checked.count(LI))
- return;
- Checked.insert(LI);
- if (LI->empty())
- return;
- SlotIndex LastEnd = LI->begin()->start;
- for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
- LRI != LRE; ++LRI) {
- const LiveRange& LR = *LRI;
- if (LastEnd > LR.start || LR.start >= LR.end)
- Bogus.insert(LI);
- LastEnd = LR.end;
- }
- }
-
- bool rangesOk() const {
- return Bogus.empty();
- }
- };
-#endif
-
- // Collect IntRangePairs for all operands of MI that may need fixing.
- // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
- // maps).
- void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
- RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
- hasRegMaskOp = false;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand& MO = *MOI;
-
- if (MO.isRegMask()) {
- hasRegMaskOp = true;
+ unsigned Reg = MO->getReg();
+ if (!Reg)
continue;
- }
-
- if (!MO.isReg() || MO.getReg() == 0)
- continue;
-
- unsigned Reg = MO.getReg();
-
- // TODO: Currently we're skipping uses that are reserved or have no
- // interval, but we're not updating their kills. This should be
- // fixed.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ updateRange(LIS.getInterval(Reg));
continue;
-
- // Collect ranges for register units. These live ranges are computed on
- // demand, so just skip any that haven't been computed yet.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
- if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
- collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx);
- } else {
- // Collect ranges for individual virtual registers.
- collectRanges(MO, &LIS.getInterval(Reg),
- Entering, Internal, Exiting, OldIdx);
}
+
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveInterval *LI = getRegUnitLI(*Units))
+ updateRange(*LI);
}
+ if (hasRegMask)
+ updateRegMaskSlots();
}
- void collectRanges(const MachineOperand &MO, LiveInterval *LI,
- RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
- SlotIndex OldIdx) {
- if (MO.readsReg()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
- if (LR != 0)
- Entering.insert(std::make_pair(LI, LR));
- }
- if (MO.isDef()) {
- LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
- assert(LR != 0 && "No live range for def?");
- if (LR->end > OldIdx.getDeadSlot())
- Exiting.insert(std::make_pair(LI, LR));
+private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveInterval &LI) {
+ if (!Updated.insert(&LI))
+ return;
+ DEBUG({
+ dbgs() << " ";
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+ dbgs() << PrintReg(LI.reg);
else
- Internal.insert(std::make_pair(LI, LR));
- }
+ dbgs() << PrintRegUnit(LI.reg, &TRI);
+ dbgs() << ":\t" << LI << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LI);
+ else
+ handleMoveUp(LI);
+ DEBUG(dbgs() << " -->\t" << LI << '\n');
+ LI.verify();
}
- BundleRanges createBundleRanges(RangeSet& Entering,
- RangeSet& Internal,
- RangeSet& Exiting) {
- BundleRanges BR;
-
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI) {
- LiveInterval* LI = EI->first;
- LiveRange* LR = EI->second;
- BR[LI->reg].Use = LR;
- }
+ /// Update LI to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Move def to NewIdx, assert endpoint after NewIdx.
+ ///
+ /// 2. Live def at OldIdx, killed at NewIdx:
+ /// Change to dead def at NewIdx.
+ /// (Happens when bundling def+kill together).
+ ///
+ /// 3. Dead def at OldIdx:
+ /// Move def to NewIdx, possibly across another live value.
+ ///
+ /// 4. Def at OldIdx AND at NewIdx:
+ /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value read at OldIdx, killed before NewIdx:
+ /// Extend kill to NewIdx.
+ ///
+ void handleMoveDown(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ return;
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II) {
- LiveInterval* LI = II->first;
- LiveRange* LR = II->second;
- if (LR->end.isDead()) {
- BR[LI->reg].Dead = LR;
- } else {
- BR[LI->reg].EC = LR;
- }
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+ return;
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+ for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse())
+ MO->setIsKill(false);
+ // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+ // overlapping ranges. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ // If this was a kill, there may also be a def. Otherwise we're done.
+ if (!isKill)
+ return;
+ ++I;
}
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI) {
- LiveInterval* LI = EI->first;
- LiveRange* LR = EI->second;
- BR[LI->reg].Def = LR;
+ // Check for a def at OldIdx.
+ if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+ return;
+ // We have a def at OldIdx.
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+ // If the defined value extends beyond NewIdx, just move the def down.
+ // This is case 1 above.
+ if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+ I->start = DefVNI->def;
+ return;
}
-
- return BR;
- }
-
- void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
- MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
- if (!OldKillMI->killsRegister(reg))
- return; // Bail out if we don't have kill flags on the old register.
- MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
- assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
- assert(!NewKillMI->killsRegister(reg) &&
- "New kill instr is already a kill.");
- OldKillMI->clearRegisterKills(reg, &TRI);
- NewKillMI->addRegisterKilled(reg, &TRI);
- }
-
- void updateRegMaskSlots(SlotIndex OldIdx) {
- SmallVectorImpl<SlotIndex>::iterator RI =
- std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
- OldIdx);
- assert(*RI == OldIdx && "No RegMask at OldIdx.");
- *RI = NewIdx;
- assert(*prior(RI) < *RI && *RI < *next(RI) &&
- "RegSlots out of order. Did you move one call across another?");
- }
-
- // Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
- SlotIndex LastUse = NewIdx;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI.use_nodbg_begin(Reg),
- UE = MRI.use_nodbg_end();
- UI != UE; UI.skipInstruction()) {
- const MachineInstr* MI = &*UI;
- SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
- if (InstSlot > LastUse && InstSlot < OldIdx)
- LastUse = InstSlot;
+ // The remaining possibilities are now:
+ // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+ // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+ // In either case, it is possible that there is an existing def at NewIdx.
+ assert((I->end == OldIdx.getDeadSlot() ||
+ SlotIndex::isSameInstr(I->end, NewIdx)) &&
+ "Cannot move def below kill");
+ LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+ if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+ // coalesced into that value.
+ assert(NewI->valno != DefVNI && "Multiple defs of value?");
+ LI.removeValNo(DefVNI);
+ return;
}
- return LastUse;
+ // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+ // If the def at OldIdx was dead, we allow it to be moved across other LI
+ // values. The new range should be placed immediately before NewI, move any
+ // intermediate ranges up.
+ assert(NewI != I && "Inconsistent iterators");
+ std::copy(llvm::next(I), NewI, I);
+ *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
- void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- bool LiveThrough = LR->end > OldIdx.getRegSlot();
- if (LiveThrough)
+ /// Update LI to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx.
+ ///
+ /// 1. Live def at OldIdx:
+ /// Hoist def to NewIdx.
+ ///
+ /// 2. Dead def at OldIdx:
+ /// Hoist def+end to NewIdx, possibly move across other values.
+ ///
+ /// 3. Dead def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at OldIdx, coalescing it with existing value.
+ ///
+ /// 4. Live def at OldIdx AND existing def at NewIdx:
+ /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+ /// (Happens when bundling multiple defs together).
+ ///
+ /// 5. Value killed at OldIdx:
+ /// Hoist kill to NewIdx, then scan for last kill between NewIdx and
+ /// OldIdx.
+ ///
+ void handleMoveUp(LiveInterval &LI) {
+ // First look for a kill at OldIdx.
+ LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+ LiveInterval::iterator E = LI.end();
+ // Is LI even live at OldIdx?
+ if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
return;
- SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
- if (LastUse != NewIdx)
- moveKillFlags(LI->reg, NewIdx, LastUse);
- LR->end = LastUse.getRegSlot();
- }
- void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- // Extend the LiveRange if NewIdx is past the end.
- if (NewIdx > LR->end) {
- // Move kill flags if OldIdx was not originally the end
- // (otherwise LR->end points to an invalid slot).
- if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
- assert(LR->end > OldIdx && "LiveRange does not cover original slot");
- moveKillFlags(LI->reg, LR->end, NewIdx);
+ // Handle a live-in value.
+ if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // If the live-in value isn't killed here, there is nothing to do.
+ if (!SlotIndex::isSameInstr(OldIdx, I->end))
+ return;
+ // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+ // another use, we need to search for that use. Case 5 above.
+ I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+ ++I;
+ // If OldIdx also defines a value, there couldn't have been another use.
+ if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+ // No def, search for the new kill.
+ // This can never be an early clobber kill since there is no def.
+ llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+ return;
}
- LR->end = NewIdx.getRegSlot();
- }
- }
-
- void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
- bool GoingUp = NewIdx < OldIdx;
-
- if (GoingUp) {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringUpFrom(OldIdx, *EI);
- } else {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringDownFrom(OldIdx, *EI);
}
- }
-
- void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
- LR->end <= OldIdx.getDeadSlot() &&
- "Range should be internal to OldIdx.");
- LiveRange Tmp(*LR);
- Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
- Tmp.valno->def = Tmp.start;
- Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
- LI->removeRange(*LR);
- LI->addRange(Tmp);
- }
-
- void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II)
- moveInternalFrom(OldIdx, *II);
- }
-
- void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
- LiveRange* LR = P.second;
- assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
- "Range should start in OldIdx.");
- assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
- SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
- LR->start = NewStart;
- LR->valno->def = NewStart;
- }
-
- void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI)
- moveExitingFrom(OldIdx, *EI);
- }
- void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- bool LiveThrough = LR->end > OldIdx.getRegSlot();
- if (LiveThrough) {
- assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
- "Def in bundle should be def range.");
- assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
- "If bundle has use for this reg it should be LR.");
- BR[LI->reg].Use = LR;
+ // Now deal with the def at OldIdx.
+ assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+ VNInfo *DefVNI = I->valno;
+ assert(DefVNI->def == I->start && "Inconsistent def");
+ DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+ // Check for an existing def at NewIdx.
+ LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+ assert(NewI->valno != DefVNI && "Same value defined more than once?");
+ // There is an existing def at NewIdx.
+ if (I->end.isDead()) {
+ // Case 3: Remove the dead def at OldIdx.
+ LI.removeValNo(DefVNI);
+ return;
+ }
+ // Case 4: Replace def at NewIdx with live def at OldIdx.
+ I->start = DefVNI->def;
+ LI.removeValNo(NewI->valno);
return;
}
- SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
- moveKillFlags(LI->reg, OldIdx, LastUse);
-
- if (LR->start < NewIdx) {
- // Becoming a new entering range.
- assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
- "Bundle shouldn't be re-defining reg mid-range.");
- assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
- "Bundle shouldn't have different use range for same reg.");
- LR->end = LastUse.getRegSlot();
- BR[LI->reg].Use = LR;
- } else {
- // Becoming a new Dead-def.
- assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
- "Live range starting at unexpected slot.");
- assert(BR[LI->reg].Def == LR && "Reg should have def range.");
- assert(BR[LI->reg].Dead == 0 &&
- "Can't have def and dead def of same reg in a bundle.");
- LR->end = LastUse.getDeadSlot();
- BR[LI->reg].Dead = BR[LI->reg].Def;
- BR[LI->reg].Def = 0;
- }
- }
-
- void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
- if (NewIdx > LR->end) {
- // Range extended to bundle. Add to bundle uses.
- // Note: Currently adds kill flags to bundle start.
- assert(BR[LI->reg].Use == 0 &&
- "Bundle already has use range for reg.");
- moveKillFlags(LI->reg, LR->end, NewIdx);
- LR->end = NewIdx.getRegSlot();
- BR[LI->reg].Use = LR;
- } else {
- assert(BR[LI->reg].Use != 0 &&
- "Bundle should already have a use range for reg.");
- }
- }
-
- void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
- BundleRanges& BR) {
- bool GoingUp = NewIdx < OldIdx;
-
- if (GoingUp) {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringUpFromInto(OldIdx, *EI, BR);
- } else {
- for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
- EI != EE; ++EI)
- moveEnteringDownFromInto(OldIdx, *EI, BR);
+ // There is no existing def at NewIdx. Hoist DefVNI.
+ if (!I->end.isDead()) {
+ // Leave the end point of a live def.
+ I->start = DefVNI->def;
+ return;
}
- }
- void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- // TODO: Sane rules for moving ranges into bundles.
+ // DefVNI is a dead def. It may have been moved across other values in LI,
+ // so move I up to NewI. Slide [NewI;I) down one position.
+ std::copy_backward(NewI, I, llvm::next(I));
+ *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
- void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
- BundleRanges& BR) {
- for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
- II != IE; ++II)
- moveInternalFromInto(OldIdx, *II, BR);
+ void updateRegMaskSlots() {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
+ "Cannot move regmask instruction below another call");
}
- void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
- BundleRanges& BR) {
- LiveInterval* LI = P.first;
- LiveRange* LR = P.second;
-
- assert(LR->start.isRegister() &&
- "Don't know how to merge exiting ECs into bundles yet.");
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(unsigned Reg) {
+ SlotIndex LastUse = NewIdx;
- if (LR->end > NewIdx.getDeadSlot()) {
- // This range is becoming an exiting range on the bundle.
- // If there was an old dead-def of this reg, delete it.
- if (BR[LI->reg].Dead != 0) {
- LI->removeRange(*BR[LI->reg].Dead);
- BR[LI->reg].Dead = 0;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI.use_nodbg_begin(Reg),
+ UE = MRI.use_nodbg_end();
+ UI != UE; UI.skipInstruction()) {
+ const MachineInstr* MI = &*UI;
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot;
}
- assert(BR[LI->reg].Def == 0 &&
- "Can't have two defs for the same variable exiting a bundle.");
- LR->start = NewIdx.getRegSlot();
- LR->valno->def = LR->start;
- BR[LI->reg].Def = LR;
} else {
- // This range is becoming internal to the bundle.
- assert(LR->end == NewIdx.getRegSlot() &&
- "Can't bundle def whose kill is before the bundle");
- if (BR[LI->reg].Dead || BR[LI->reg].Def) {
- // Already have a def for this. Just delete range.
- LI->removeRange(*LR);
- } else {
- // Make range dead, record.
- LR->end = NewIdx.getDeadSlot();
- BR[LI->reg].Dead = LR;
- assert(BR[LI->reg].Use == LR &&
- "Range becoming dead should currently be use.");
+ MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx);
+ MachineBasicBlock::iterator MII(MI);
+ ++MII;
+ MachineBasicBlock* MBB = MI->getParent();
+ for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){
+ for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
+ MOE = MII->operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand& mop = *MOI;
+ if (!mop.isReg() || mop.getReg() == 0 ||
+ TargetRegisterInfo::isVirtualRegister(mop.getReg()))
+ continue;
+
+ if (TRI.hasRegUnit(mop.getReg(), Reg))
+ LastUse = LIS.getInstructionIndex(MII);
+ }
}
- // In both cases the range is no longer a use on the bundle.
- BR[LI->reg].Use = 0;
}
+ return LastUse;
}
-
- void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
- BundleRanges& BR) {
- for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
- EI != EE; ++EI)
- moveExitingFromInto(OldIdx, *EI, BR);
- }
-
};
-void LiveIntervals::handleMove(MachineInstr* MI) {
+void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
+ assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
Indexes->removeMachineInstrFromMaps(MI);
- SlotIndex NewIndex = MI->isInsideBundle() ?
- Indexes->getInstructionIndex(MI) :
- Indexes->insertMachineInstrInMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
OldIndex < getMBBEndIdx(MI->getParent()) &&
"Cannot handle moves across basic block boundaries.");
- assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
- HMEditor HME(*this, *MRI, *TRI, NewIndex);
- HME.moveAllRangesFrom(MI, OldIndex);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
}
void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
- MachineInstr* BundleStart) {
+ MachineInstr* BundleStart,
+ bool UpdateFlags) {
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
- HMEditor HME(*this, *MRI, *TRI, NewIndex);
- HME.moveAllRangesInto(MI, BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(MI);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
index cd4e690..4d41fca 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.h
@@ -178,8 +178,8 @@ public:
bool checkLoopInterference(MachineLoopRange*);
private:
- Query(const Query&); // DO NOT IMPLEMENT
- void operator=(const Query&); // DO NOT IMPLEMENT
+ Query(const Query&) LLVM_DELETED_FUNCTION;
+ void operator=(const Query&) LLVM_DELETED_FUNCTION;
};
// Array of LiveIntervalUnions.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index d828f25..c3ff4f1 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) {
// Visit all operands that read Reg. This may include partial defs.
for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
E = MRI->reg_nodbg_end(); I != E; ++I) {
- const MachineOperand &MO = I.getOperand();
+ MachineOperand &MO = I.getOperand();
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervalAnalysis::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
if (!MO.readsReg())
continue;
// MI is reading Reg. We may have visited MI before if it happens to be
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index b4ce9aa..f8fbc7d 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
// We can't remat physreg uses, unless it is a constant.
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction()))
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
continue;
return false;
}
@@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
if (OVNI != li.getVNInfoAt(UseIdx))
return false;
}
@@ -249,7 +256,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
unsigned Reg = MOI->getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
- if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+ if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index cdb1776..7f22478 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "regalloc"
#include "LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
unsigned PhysReg) {
if (VirtReg.empty())
return false;
+ CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (VirtReg.overlaps(LIS->getRegUnit(*Units)))
+ if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes()))
return true;
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
index b3e2d7f..8f22c24 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.h
@@ -15,7 +15,7 @@
// Register units are defined in MCRegisterInfo.h, they represent the smallest
// unit of interference when dealing with overlapping physical registers. The
// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When
-// a virtual register is assigned to a physicval register, the live range for
+// a virtual register is assigned to a physical register, the live range for
// the virtual register is inserted into the LiveIntervalUnion for each regunit
// in the physreg.
//
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index 939e795..f0b522b 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,7 +25,10 @@
using namespace llvm;
char LiveStacks::ID = 0;
-INITIALIZE_PASS(LiveStacks, "livestacks",
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
"Live Stack Slot Analysis", false, false)
char &llvm::LiveStacksID = LiveStacks::ID;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 348ed3a..6ea933d 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
}
void LiveVariables::VarInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
E = AliveBlocks.end(); I != E; ++I)
@@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const {
dbgs() << "\n #" << i << ": " << *Kills[i];
dbgs() << "\n";
}
+#endif
}
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
@@ -501,8 +503,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
TRI = MF->getTarget().getRegisterInfo();
- ReservedRegisters = TRI->getReservedRegs(mf);
-
unsigned NumRegs = TRI->getNumRegs();
PhysRegDef = new MachineInstr*[NumRegs];
PhysRegUse = new MachineInstr*[NumRegs];
@@ -586,7 +586,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = UseRegs[i];
if (TargetRegisterInfo::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
- else if (!ReservedRegisters[MOReg])
+ else if (!MRI->isReserved(MOReg))
HandlePhysRegUse(MOReg, MI);
}
@@ -599,7 +599,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
unsigned MOReg = DefRegs[i];
if (TargetRegisterInfo::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
- else if (!ReservedRegisters[MOReg])
+ else if (!MRI->isReserved(MOReg))
HandlePhysRegDef(MOReg, MI, Defs);
}
UpdatePhysRegDefs(MI, Defs);
@@ -806,18 +806,44 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
MachineBasicBlock *SuccBB) {
const unsigned NumNew = BB->getNumber();
- // All registers used by PHI nodes in SuccBB must be live through BB.
- for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
- BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+ SmallSet<unsigned, 16> Defs, Kills;
+
+ MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+ for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+ // Record the def of the PHI node.
+ Defs.insert(BBI->getOperand(0).getReg());
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
if (BBI->getOperand(i+1).getMBB() == BB)
getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+ }
+
+ // Record all vreg defs and kills of all instructions in SuccBB.
+ for (; BBI != BBE; ++BBI) {
+ for (MachineInstr::mop_iterator I = BBI->operands_begin(),
+ E = BBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isDef())
+ Defs.insert(I->getReg());
+ else if (I->isKill())
+ Kills.insert(I->getReg());
+ }
+ }
+ }
// Update info for all live variables
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // If the Defs is defined in the successor it can't be live in BB.
+ if (Defs.count(Reg))
+ continue;
+
+ // If the register is either killed in or live through SuccBB it's also live
+ // through BB.
VarInfo &VI = getVarInfo(Reg);
- if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+ if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
VI.AliveBlocks.set(NumNew);
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index fa6b450..18d021d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Assembly/Writer.h"
@@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
instr_iterator I = instr_begin(), E = instr_end();
while (I != E && I->isPHI())
++I;
- assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi MI cannot be inside a bundle!");
return I;
}
@@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
- assert(!I->isInsideBundle() &&
+ assert((I == E || !I->isInsideBundle()) &&
"First non-phi / non-label instruction is inside a bundle!");
return I;
}
@@ -228,9 +229,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
return 0;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineBasicBlock::dump() const {
print(dbgs());
}
+#endif
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
@@ -243,7 +246,7 @@ StringRef MachineBasicBlock::getName() const {
std::string MachineBasicBlock::getFullName() const {
std::string Name;
if (getParent())
- Name = (getParent()->getFunction()->getName() + ":").str();
+ Name = (getParent()->getName() + ":").str();
if (getBasicBlock())
Name += getBasicBlock()->getName();
else
@@ -942,12 +945,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
/// getSuccWeight - Return weight of the edge from this block to MBB.
///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
if (Weights.empty())
return 0;
- const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
- return *getWeightIterator(I);
+ return *getWeightIterator(Succ);
}
/// getWeightIterator - Return wight iterator corresonding to the I successor
@@ -970,6 +972,80 @@ getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
return Weights.begin() + index;
}
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+///
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+ unsigned Reg, MachineInstr *MI,
+ unsigned Neighborhood) {
+
+ unsigned N = Neighborhood;
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // Start by searching backwards from MI, looking for kills, reads or defs.
+
+ MachineBasicBlock::iterator I(MI);
+ // If this is the first insn in the block, don't search backwards.
+ if (I != MBB->begin()) {
+ do {
+ --I;
+
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.Kills)
+ // Register killed, so isn't live.
+ return LQR_Dead;
+
+ else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap)
+ // Defined or read without a previous kill - live.
+ return (Analysis.Defines || Analysis.Reads) ?
+ LQR_Live : LQR_OverlappingLive;
+
+ } while (I != MBB->begin() && --N > 0);
+ }
+
+ // Did we get to the start of the block?
+ if (I == MBB->begin()) {
+ // If so, the register's state is definitely defined by the live-in state.
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
+ RAI.isValid(); ++RAI) {
+ if (MBB->isLiveIn(*RAI))
+ return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
+ }
+
+ return LQR_Dead;
+ }
+
+ N = Neighborhood;
+
+ // Try searching forwards from MI, looking for reads or defs.
+ I = MachineBasicBlock::iterator(MI);
+ // If this is the last insn in the block, don't search forwards.
+ if (I != MBB->end()) {
+ for (++I; I != MBB->end() && N > 0; ++I, --N) {
+ MachineOperandIteratorBase::PhysRegInfo Analysis =
+ MIOperands(I).analyzePhysReg(Reg, TRI);
+
+ if (Analysis.ReadsOverlap)
+ // Used, therefore must have been live.
+ return (Analysis.Reads) ?
+ LQR_Live : LQR_OverlappingLive;
+
+ else if (Analysis.DefinesOverlap)
+ // Defined (but not read) therefore cannot have been live.
+ return LQR_Dead;
+ }
+ }
+
+ // At this point we have no idea of the liveness of the register.
+ return LQR_Unknown;
+}
+
void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
bool t) {
OS << "BB#" << MBB->getNumber();
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c4dca2c..cd3f199 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -500,11 +500,10 @@ void MachineBlockPlacement::buildChain(
assert(BB);
assert(BlockToChain[BB] == &Chain);
assert(*llvm::prior(Chain.end()) == BB);
- MachineBasicBlock *BestSucc = 0;
// Look for the best viable successor if there is one to place immediately
// after this block.
- BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+ MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -1014,7 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (F.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize))
return;
unsigned Align = TLI->getPrefLoopAlignment();
if (!Align)
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0..4479211 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Scale = 1;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight;
}
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
Sum = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
Sum += Weight / Scale;
}
assert(Sum <= UINT32_MAX);
return Sum;
}
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
uint32_t Weight = Src->getSuccWeight(Dst);
if (!Weight)
return DEFAULT_WEIGHT;
return Weight;
}
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
MachineBasicBlock *Dst) const {
// Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
MachineBasicBlock *MaxSucc = 0;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
- uint32_t Weight = getEdgeWeight(MBB, *I);
+ uint32_t Weight = getEdgeWeight(MBB, I);
if (Weight > MaxWeight) {
MaxWeight = Weight;
MaxSucc = *I;
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 896461f..dbc41de 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -63,8 +63,6 @@ namespace {
virtual void releaseMemory() {
ScopeMap.clear();
Exps.clear();
- AllocatableRegs.clear();
- ReservedRegs.clear();
}
private:
@@ -78,8 +76,6 @@ namespace {
ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
- BitVector AllocatableRegs;
- BitVector ReservedRegs;
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
@@ -88,7 +84,8 @@ namespace {
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
- SmallVector<unsigned,2> &PhysDefs) const;
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
SmallSet<unsigned,8> &PhysRefs,
SmallVector<unsigned,2> &PhysDefs,
@@ -198,31 +195,52 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
SmallSet<unsigned,8> &PhysRefs,
- SmallVector<unsigned,2> &PhysDefs) const{
- MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ SmallVector<unsigned,2> &PhysDefs,
+ bool &PhysUseDef) const{
+ // First, add all uses to PhysRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
+ if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- // If the def is dead, it's ok. But the def may not marked "dead". That's
- // common since this pass is run before livevariables. We can scan
- // forward a few instructions and check if it is obviously dead.
- if (MO.isDef() &&
- (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
- continue;
// Reading constant physregs is ok.
if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
- if (MO.isDef())
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
PhysDefs.push_back(Reg);
}
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+
return !PhysRefs.empty();
}
@@ -242,7 +260,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
- if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+ if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
// Avoid extending live range of physical registers if they are
//allocatable or reserved.
return false;
@@ -411,8 +429,8 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
assert(SI != ScopeMap.end());
- ScopeMap.erase(SI);
delete SI->second;
+ ScopeMap.erase(SI);
}
bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
@@ -463,16 +481,22 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
bool CrossMBBPhysDef = false;
SmallSet<unsigned, 8> PhysRefs;
SmallVector<unsigned, 2> PhysDefs;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
+ bool PhysUseDef = false;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+ PhysDefs, PhysUseDef)) {
FoundCSE = false;
// ... Unless the CS is local or is in the sole predecessor block
// and it also defines the physical register which is not clobbered
// in between and the physical register uses were not clobbered.
- unsigned CSVN = VNT.lookup(MI);
- MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
- FoundCSE = true;
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
}
if (!FoundCSE) {
@@ -635,7 +659,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<MachineDominatorTree>();
- AllocatableRegs = TRI->getAllocatableSet(MF);
- ReservedRegs = TRI->getReservedRegs(MF);
return PerformCSE(DT->getRootNode());
}
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index bac3aa2..4a79328 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -16,6 +16,7 @@
#include "llvm/Pass.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -32,7 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
namespace {
class MachineCopyPropagation : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
- BitVector ReservedRegs;
+ MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification, replacement for typeid
@@ -146,8 +147,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
if (CI != AvailCopyMap.end()) {
MachineInstr *CopyMI = CI->second;
- if (!ReservedRegs.test(Def) &&
- (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+ if (!MRI->isReserved(Def) &&
+ (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
isNopCopy(CopyMI, Def, Src, TRI)) {
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
@@ -259,7 +260,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
unsigned Reg = (*DI)->getOperand(0).getReg();
- if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
continue;
(*DI)->eraseFromParent();
Changed = true;
@@ -296,7 +297,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (SmallSetVector<MachineInstr*, 8>::iterator
DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
DI != DE; ++DI) {
- if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+ if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
(*DI)->eraseFromParent();
Changed = true;
++NumDeletes;
@@ -311,7 +312,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
TRI = MF.getTarget().getRegisterInfo();
- ReservedRegs = TRI->getReservedRegs(MF);
+ MRI = &MF.getRegInfo();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
Changed |= CopyPropagateBlock(*I);
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index d4aede8a..91d5211 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -28,7 +28,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
RegInfo = 0;
MFInfo = 0;
FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
- if (Fn->hasFnAttr(Attribute::StackAlignment))
- FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs(
- Fn->getAttributes().getFnAttributes()));
- ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+ if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment))
+ FrameInfo->ensureMaxAlignment(Fn->getAttributes().
+ getFnAttributes().getStackAlignment());
+ ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout());
Alignment = TM.getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
- if (!Fn->hasFnAttr(Attribute::OptimizeForSize))
+ if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
Alignment = std::max(Alignment,
TM.getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
@@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
return std::make_pair(Result, Result + Num);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineFunction::dump() const {
print(dbgs());
}
+#endif
+
+StringRef MachineFunction::getName() const {
+ assert(getFunction() && "No function!");
+ return getFunction()->getName();
+}
void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
- OS << "# Machine code for function " << Fn->getName() << ": ";
+ OS << "# Machine code for function " << getName() << ": ";
if (RegInfo) {
OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
if (!RegInfo->tracksLiveness())
@@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
BB->print(OS, Indexes);
}
- OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+ OS << "\n# End machine code for function " << getName() << ".\n\n";
}
namespace llvm {
@@ -344,7 +351,7 @@ namespace llvm {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const MachineFunction *F) {
- return "CFG for '" + F->getFunction()->getName().str() + "' function";
+ return "CFG for '" + F->getName().str() + "' function";
}
std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -377,7 +384,7 @@ namespace llvm {
void MachineFunction::viewCFG() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getName());
+ ViewGraph(this, "mf" + getName());
#else
errs() << "MachineFunction::viewCFG is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const
void MachineFunction::viewCFGOnly() const
{
#ifndef NDEBUG
- ViewGraph(this, "mf" + getFunction()->getName(), true);
+ ViewGraph(this, "mf" + getName(), true);
#else
errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
@@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
unsigned StackAlign = TFI.getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/false, false));
+ /*isSS*/ false,
+ /*NeedSP*/ false,
+ /*Alloca*/ 0));
return -++NumFixedObjects;
}
@@ -525,16 +534,18 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineFrameInfo::dump(const MachineFunction &MF) const {
print(MF, dbgs());
}
+#endif
//===----------------------------------------------------------------------===//
// MachineJumpTableInfo implementation
//===----------------------------------------------------------------------===//
/// getEntrySize - Return the size of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
// The size of a jump table entry is 4 bytes unless the entry is just the
// address of a block, in which case it is the pointer size.
switch (getEntryKind()) {
@@ -553,7 +564,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
}
/// getEntryAlignment - Return the alignment of each entry in the jump table.
-unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
// The alignment of a jump table entry is the alignment of int32 unless the
// entry is just the address of a block, in which case it is the pointer
// alignment.
@@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
OS << '\n';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
//===----------------------------------------------------------------------===//
@@ -657,7 +670,7 @@ MachineConstantPool::~MachineConstantPool() {
/// CanShareConstantPoolEntry - Test whether the given two constants
/// can be allocated the same constant pool entry.
static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
- const TargetData *TD) {
+ const DataLayout *TD) {
// Handle the trivial case quickly.
if (A == B) return true;
@@ -681,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// Try constant folding a bitcast of both instructions to an integer. If we
// get two identical ConstantInt's, then we are good to share them. We use
// the constant folding APIs to do this so that we get the benefit of
- // TargetData.
+ // DataLayout.
if (isa<PointerType>(A->getType()))
A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
const_cast<Constant*>(A), TD);
@@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const {
if (Constants[i].isMachineConstantPoolEntry())
Constants[i].Val.MachineCPVal->print(OS);
else
- OS << *(Value*)Constants[i].Val.ConstVal;
+ OS << *(const Value*)Constants[i].Val.ConstVal;
OS << ", align=" << Constants[i].getAlignment();
OS << "\n";
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0102ac7..ed94efb 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
char MachineFunctionPrinterPass::ID = 0;
}
-char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
"Machine Function Printer", false, false)
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index b166849..ce8d520 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) {
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
// If this operand is currently a register operand, and if this is in a
// function, deregister the operand from the register's use/def list.
if (isReg() && isOnRegUseList())
@@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
RegInfo = &MF->getRegInfo();
// If this operand is already a register operand, remove it from the
// register's use/def lists.
- if (RegInfo && isReg())
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
RegInfo->removeRegOperandFromUseList(this);
// Change this to a register and set the reg#.
@@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsDebug = isDebug;
// Ensure isOnRegUseList() returns false.
Contents.Reg.Prev = 0;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
// If this operand is embedded in a function, add the operand to the
// register's use/def list.
@@ -193,7 +198,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return !strcmp(getSymbolName(), Other.getSymbolName()) &&
getOffset() == Other.getOffset();
case MachineOperand::MO_BlockAddress:
- return getBlockAddress() == Other.getBlockAddress();
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
case MO_RegisterMask:
return getRegMask() == Other.getRegMask();
case MachineOperand::MO_MCSymbol:
@@ -208,8 +214,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
hash_code llvm::hash_value(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_Register:
- return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(),
- MO.getSubReg(), MO.isDef());
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
case MachineOperand::MO_Immediate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
case MachineOperand::MO_CImmediate:
@@ -234,7 +240,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
MO.getOffset());
case MachineOperand::MO_BlockAddress:
return hash_combine(MO.getType(), MO.getTargetFlags(),
- MO.getBlockAddress());
+ MO.getBlockAddress(), MO.getOffset());
case MachineOperand::MO_RegisterMask:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
case MachineOperand::MO_Metadata:
@@ -262,7 +268,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
- isInternalRead() || isEarlyClobber()) {
+ isInternalRead() || isEarlyClobber() || isTied()) {
OS << '<';
bool NeedComma = false;
if (isDef()) {
@@ -282,27 +288,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
NeedComma = true;
}
- if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) {
+ if (isKill()) {
if (NeedComma) OS << ',';
- NeedComma = false;
- if (isKill()) {
- OS << "kill";
- NeedComma = true;
- }
- if (isDead()) {
- OS << "dead";
- NeedComma = true;
- }
- if (isUndef() && isUse()) {
- if (NeedComma) OS << ',';
- OS << "undef";
- NeedComma = true;
- }
- if (isInternalRead()) {
- if (NeedComma) OS << ',';
- OS << "internal";
- NeedComma = true;
- }
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ if (NeedComma) OS << ',';
+ OS << "dead";
+ NeedComma = true;
+ }
+ if (isUndef() && isUse()) {
+ if (NeedComma) OS << ',';
+ OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
+ }
+ if (isTied()) {
+ if (NeedComma) OS << ',';
+ OS << "tied";
+ if (TiedTo != 15)
+ OS << unsigned(TiedTo - 1);
+ NeedComma = true;
}
OS << '>';
}
@@ -352,6 +363,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
case MachineOperand::MO_BlockAddress:
OS << '<';
WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+ if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
case MachineOperand::MO_RegisterMask:
@@ -528,20 +540,6 @@ void MachineInstr::addImplicitDefUseOperands() {
/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
/// implicit operands. It reserves space for the number of operands specified by
/// the MCInstrDesc.
-MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
- : MCID(&tid), Flags(0), AsmPrinterFlags(0),
- NumMemRefs(0), MemRefs(0), Parent(0) {
- unsigned NumImplicitOps = 0;
- if (!NoImp)
- NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + MCID->getNumOperands());
- if (!NoImp)
- addImplicitDefUseOperands();
- // Make sure that we get added to a machine basicblock
- LeakDetector::addGarbageObject(this);
-}
-
-/// MachineInstr ctor - As above, but with a DebugLoc.
MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
bool NoImp)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
@@ -559,21 +557,6 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
/// MachineInstr ctor - Work exactly the same as the ctor two above, except
/// that the MachineInstr is created and added to the end of the specified
/// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
- : MCID(&tid), Flags(0), AsmPrinterFlags(0),
- NumMemRefs(0), MemRefs(0), Parent(0) {
- assert(MBB && "Cannot use inserting ctor with null basic block!");
- unsigned NumImplicitOps =
- MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
- Operands.reserve(NumImplicitOps + MCID->getNumOperands());
- addImplicitDefUseOperands();
- // Make sure that we get added to a machine basicblock
- LeakDetector::addGarbageObject(this);
- MBB->push_back(this); // Add instruction to end of basic block!
-}
-
-/// MachineInstr ctor - As above, but with a DebugLoc.
-///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const MCInstrDesc &tid)
: MCID(&tid), Flags(0), AsmPrinterFlags(0),
@@ -673,6 +656,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (!isImpReg && !isInlineAsm()) {
while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
--OpNo;
+ assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
if (RegInfo)
RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
}
@@ -708,12 +692,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
if (Operands[OpNo].isReg()) {
// Ensure isOnRegUseList() returns false, regardless of Op's status.
Operands[OpNo].Contents.Reg.Prev = 0;
+ // Ignore existing ties. This is not a property that can be copied.
+ Operands[OpNo].TiedTo = 0;
// Add the new operand to RegInfo.
if (RegInfo)
RegInfo->addRegOperandToUseList(&Operands[OpNo]);
- // If the register operand is flagged as early, mark the operand as such.
- if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
- Operands[OpNo].setIsEarlyClobber(true);
+ // The MCID operand information isn't accurate until we start adding
+ // explicit operands. The implicit operands are added first, then the
+ // explicits are inserted before them.
+ if (!isImpReg) {
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (Operands[OpNo].isUse()) {
+ int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ tieOperands(DefIdx, OpNo);
+ }
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ Operands[OpNo].setIsEarlyClobber(true);
+ }
}
// Re-add all the implicit ops.
@@ -730,6 +727,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
///
void MachineInstr::RemoveOperand(unsigned OpNo) {
assert(OpNo < Operands.size() && "Invalid operand number");
+ untieRegOperand(OpNo);
MachineRegisterInfo *RegInfo = getRegInfo();
// Special case removing the last one.
@@ -752,6 +750,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
}
}
+#ifndef NDEBUG
+ // Moving tied operands would break the ties.
+ for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i)
+ if (Operands[i].isReg())
+ assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
Operands.erase(Operands.begin()+OpNo);
if (RegInfo) {
@@ -935,6 +940,12 @@ bool MachineInstr::isStackAligningInlineAsm() const {
return false;
}
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+ assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
unsigned *GroupNo) const {
assert(isInlineAsm() && "Expected an inline asm instruction");
@@ -1004,9 +1015,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
unsigned MachineInstr::getBundleSize() const {
assert(isBundle() && "Expecting a bundle");
- MachineBasicBlock::const_instr_iterator I = *this;
+ const MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end();
unsigned Size = 0;
- while ((++I)->isInsideBundle()) {
+ while ((++I != E) && I->isInsideBundle()) {
++Size;
}
assert(Size > 1 && "Malformed bundle");
@@ -1114,107 +1126,99 @@ int MachineInstr::findFirstPredOperandIdx() const {
return -1;
}
-/// isRegTiedToUseOperand - Given the index of a register def operand,
-/// check if the register def is tied to a source operand, due to either
-/// two-address elimination or inline assembly constraints. Returns the
-/// first tied use operand index by reference is UseOpIdx is not null.
-bool MachineInstr::
-isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
- if (isInlineAsm()) {
- assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
- const MachineOperand &MO = getOperand(DefOpIdx);
- if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
- return false;
- // Determine the actual operand index that corresponds to this index.
- unsigned DefNo = 0;
- int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo);
- if (FlagIdx < 0)
- return false;
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
- // Which part of the group is DefOpIdx?
- unsigned DefPart = DefOpIdx - (FlagIdx + 1);
-
- for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
- i != e; ++i) {
- const MachineOperand &FMO = getOperand(i);
- if (!FMO.isImm())
- continue;
- if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
- continue;
- unsigned Idx;
- if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
- Idx == DefNo) {
- if (UseOpIdx)
- *UseOpIdx = (unsigned)i + 1 + DefPart;
- return true;
- }
- }
- return false;
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0: Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax: Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+ MachineOperand &DefMO = getOperand(DefIdx);
+ MachineOperand &UseMO = getOperand(UseIdx);
+ assert(DefMO.isDef() && "DefIdx must be a def operand");
+ assert(UseMO.isUse() && "UseIdx must be a use operand");
+ assert(!DefMO.isTied() && "Def is already tied to another use");
+ assert(!UseMO.isTied() && "Use is already tied to another def");
+
+ if (DefIdx < TiedMax)
+ UseMO.TiedTo = DefIdx + 1;
+ else {
+ // Inline asm can use the group descriptors to find tied operands, but on
+ // normal instruction, the tied def must be within the first TiedMax
+ // operands.
+ assert(isInlineAsm() && "DefIdx out of range");
+ UseMO.TiedTo = TiedMax;
}
- assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
- const MCInstrDesc &MCID = getDesc();
- for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && MO.isUse() &&
- MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
- if (UseOpIdx)
- *UseOpIdx = (unsigned)i;
- return true;
- }
- }
- return false;
+ // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+ DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
}
-/// isRegTiedToDefOperand - Return true if the operand of the specified index
-/// is a register use and it is tied to an def operand. It also returns the def
-/// operand index by reference.
-bool MachineInstr::
-isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
- if (isInlineAsm()) {
- const MachineOperand &MO = getOperand(UseOpIdx);
- if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
- return false;
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isTied() && "Operand isn't tied");
- // Find the flag operand corresponding to UseOpIdx
- int FlagIdx = findInlineAsmFlagIdx(UseOpIdx);
- if (FlagIdx < 0)
- return false;
+ // Normally TiedTo is in range.
+ if (MO.TiedTo < TiedMax)
+ return MO.TiedTo - 1;
- const MachineOperand &UFMO = getOperand(FlagIdx);
- unsigned DefNo;
- if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
- if (!DefOpIdx)
- return true;
-
- unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
- // Remember to adjust the index. First operand is asm string, second is
- // the HasSideEffects and AlignStack bits, then there is a flag for each.
- while (DefNo) {
- const MachineOperand &FMO = getOperand(DefIdx);
- assert(FMO.isImm());
- // Skip over this def.
- DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
- --DefNo;
- }
- *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
- return true;
+ // Uses on normal instructions can be out of range.
+ if (!isInlineAsm()) {
+ // Normal tied defs must be in the 0..TiedMax-1 range.
+ if (MO.isUse())
+ return TiedMax - 1;
+ // MO is a def. Search for the tied use.
+ for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &UseMO = getOperand(i);
+ if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+ return i;
}
- return false;
+ llvm_unreachable("Can't find tied use");
}
- const MCInstrDesc &MCID = getDesc();
- if (UseOpIdx >= MCID.getNumOperands())
- return false;
- const MachineOperand &MO = getOperand(UseOpIdx);
- if (!MO.isReg() || !MO.isUse())
- return false;
- int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
- if (DefIdx == -1)
- return false;
- if (DefOpIdx)
- *DefOpIdx = (unsigned)DefIdx;
- return true;
+ // Now deal with inline asm by parsing the operand group descriptor flags.
+ // Find the beginning of each operand group.
+ SmallVector<unsigned, 8> GroupIdx;
+ unsigned OpIdxGroup = ~0u;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+ unsigned CurGroup = GroupIdx.size();
+ GroupIdx.push_back(i);
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ // OpIdx belongs to this operand group.
+ if (OpIdx > i && OpIdx < i + NumOps)
+ OpIdxGroup = CurGroup;
+ unsigned TiedGroup;
+ if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ continue;
+ // Operands in this group are tied to operands in TiedGroup which must be
+ // earlier. Find the number of operands between the two groups.
+ unsigned Delta = i - GroupIdx[TiedGroup];
+
+ // OpIdx is a use tied to TiedGroup.
+ if (OpIdxGroup == CurGroup)
+ return OpIdx - Delta;
+
+ // OpIdx is a def tied to this use group.
+ if (OpIdxGroup == TiedGroup)
+ return OpIdx + Delta;
+ }
+ llvm_unreachable("Invalid tied operand on inline asm");
}
/// clearKillInfo - Clears kill flags on all operands.
@@ -1292,7 +1296,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
AliasAnalysis *AA,
bool &SawStore) const {
// Ignore stuff that we obviously can't move.
- if (mayStore() || isCall()) {
+ //
+ // Treat volatile loads as stores. This is not strictly necessary for
+ // volatiles, but it is required for atomic loads. It is not allowed to move
+ // a load across an atomic load with Ordering > Monotonic.
+ if (mayStore() || isCall() ||
+ (mayLoad() && hasOrderedMemoryRef())) {
SawStore = true;
return false;
}
@@ -1308,8 +1317,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
// load.
if (mayLoad() && !isInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
- // end of block, or if the load is volatile, we can't move it.
- return !SawStore && !hasVolatileMemoryRef();
+ // end of block, we can't move it.
+ return !SawStore;
return true;
}
@@ -1340,11 +1349,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
return true;
}
-/// hasVolatileMemoryRef - Return true if this instruction may have a
-/// volatile memory reference, or if the information describing the
-/// memory reference is not available. Return false if it is known to
-/// have no volatile memory references.
-bool MachineInstr::hasVolatileMemoryRef() const {
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
// An instruction known never to access memory won't have a volatile access.
if (!mayStore() &&
!mayLoad() &&
@@ -1357,9 +1366,9 @@ bool MachineInstr::hasVolatileMemoryRef() const {
if (memoperands_empty())
return true;
- // Check the memory reference information for volatile references.
+ // Check the memory reference information for ordered references.
for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
- if ((*I)->isVolatile())
+ if (!(*I)->isUnordered())
return true;
return false;
@@ -1461,7 +1470,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
}
void MachineInstr::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " " << *this;
+#endif
}
static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
@@ -1540,6 +1551,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << " [sideeffect]";
if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
OS << " [alignstack]";
+ if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+ OS << " [attdialect]";
+ if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+ OS << " [inteldialect]";
StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
FirstOp = false;
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index b7de7bf..1f7fbfc 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -109,10 +109,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
TII->get(TargetOpcode::BUNDLE));
- SmallVector<unsigned, 8> LocalDefs;
- SmallSet<unsigned, 8> LocalDefSet;
+ SmallVector<unsigned, 32> LocalDefs;
+ SmallSet<unsigned, 32> LocalDefSet;
SmallSet<unsigned, 8> DeadDefSet;
- SmallSet<unsigned, 8> KilledDefSet;
+ SmallSet<unsigned, 16> KilledDefSet;
SmallVector<unsigned, 8> ExternUses;
SmallSet<unsigned, 8> ExternUseSet;
SmallSet<unsigned, 8> KilledUseSet;
@@ -181,7 +181,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
Defs.clear();
}
- SmallSet<unsigned, 8> Added;
+ SmallSet<unsigned, 32> Added;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Reg = LocalDefs[i];
if (Added.insert(Reg)) {
@@ -248,10 +248,10 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
// MachineOperand iterator
//===----------------------------------------------------------------------===//
-MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::VirtRegInfo
MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
- RegInfo RI = { false, false, false };
+ VirtRegInfo RI = { false, false, false };
for(; isValid(); ++*this) {
MachineOperand &MO = deref();
if (!MO.isReg() || MO.getReg() != Reg)
@@ -276,3 +276,53 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
}
return RI;
}
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ bool AllDefsDead = true;
+ PhysRegInfo PRI = {false, false, false, false, false, false, false};
+
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "analyzePhysReg not given a physical register!");
+ for (; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ PRI.Clobbers = true; // Regmask clobbers Reg.
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned MOReg = MO.getReg();
+ if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ continue;
+
+ bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
+ bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
+
+ if (IsRegOrSuperReg && MO.readsReg()) {
+ // Reg or a super-reg is read, and perhaps killed also.
+ PRI.Reads = true;
+ PRI.Kills = MO.isKill();
+ } if (IsRegOrOverlapping && MO.readsReg()) {
+ PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
+ }
+
+ if (!MO.isDef())
+ continue;
+
+ if (IsRegOrSuperReg) {
+ PRI.Defines = true; // Reg or a super-register is defined.
+ if (!MO.isDead())
+ AllDefsDead = false;
+ }
+ if (IsRegOrOverlapping)
+ PRI.Clobbers = true; // Reg or an overlapping reg is defined.
+ }
+
+ if (AllDefsDead && PRI.Defines)
+ PRI.DefinesDead = true; // Reg or super-register was defined and was dead.
+
+ return PRI;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index efec481..169443e 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
else
DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
- DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
+ DEBUG(dbgs() << MF.getName() << " ********\n");
if (PreRegAlloc) {
// Estimate register pressure during pre-regalloc pass.
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 9f3829e..27afeec 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
return BotMBB;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MachineLoop::dump() const {
print(dbgs());
}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index ea98b23..005bf78 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
using namespace llvm::dwarf;
-// Handle the Pass registration stuff necessary to use TargetData's.
+// Handle the Pass registration stuff necessary to use DataLayout's.
INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
"Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
index 5ab56c0..a1c7e9f 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -21,8 +21,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// Out of line virtual method.
-void MachineModuleInfoMachO::Anchor() {}
-void MachineModuleInfoELF::Anchor() {}
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 0000000..c3f6e92
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+ return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 5fb938f..95d7a7d 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
: TRI(&TRI), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedPhysRegs.resize(TRI.getNumRegs());
+ UsedRegUnits.resize(TRI.getNumRegUnits());
UsedPhysRegMask.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
clearVirtRegs();
- for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
@@ -306,22 +306,18 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
ReservedRegs = TRI->getReservedRegs(MF);
+ assert(ReservedRegs.size() == TRI->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
}
bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
- // Check if any overlapping register is modified.
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
- if (!def_empty(*AI))
- return false;
-
- // Check if any overlapping register is allocatable so it may be used later.
- if (AllocatableRegs.empty())
- AllocatableRegs = TRI->getAllocatableSet(MF);
- for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI)
- if (AllocatableRegs.test(*AI))
+ if (!def_empty(*AI) || isAllocatable(*AI))
return false;
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index a1dc948..a4817d0 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -18,11 +18,8 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -35,10 +32,12 @@
using namespace llvm;
-static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
- cl::desc("Force top-down list scheduling"));
-static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
- cl::desc("Force bottom-up list scheduling"));
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+}
#ifndef NDEBUG
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -50,6 +49,15 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+// Threshold to very roughly model an out-of-order processor's instruction
+// buffers. If the actual value of this threshold matters much in practice, then
+// it can be specified by the machine model. For now, it's an experimental
+// tuning knob to determine when and if it matters.
+static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
+ cl::desc("Allow expected latency to exceed the critical path by N cycles "
+ "before attempting to balance ILP"),
+ cl::init(10U));
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
@@ -221,7 +229,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// The Scheduler may insert instructions during either schedule() or
// exitRegion(), even for empty regions. So the local iterators 'I' and
// 'RegionEnd' are invalid across these calls.
- unsigned RemainingCount = MBB->size();
+ unsigned RemainingInstrs = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
@@ -230,19 +238,19 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
|| TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
--RegionEnd;
// Count the boundary instruction.
- --RemainingCount;
+ --RemainingInstrs;
}
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingCount) {
+ for(;I != MBB->begin(); --I, --RemainingInstrs) {
if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
break;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
- Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+ Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
@@ -252,11 +260,11 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
continue;
}
DEBUG(dbgs() << "********** MI Scheduling **********\n");
- DEBUG(dbgs() << MF->getFunction()->getName()
+ DEBUG(dbgs() << MF->getName()
<< ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
- dbgs() << " Remaining: " << RemainingCount << "\n");
+ dbgs() << " Remaining: " << RemainingInstrs << "\n");
// Schedule a region: possibly reorder instructions.
// This invalidates 'RegionEnd' and 'I'.
@@ -269,7 +277,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler->begin();
}
- assert(RemainingCount == 0 && "Instruction count mismatch!");
+ assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler->finishBlock();
}
Scheduler->finalizeSchedule();
@@ -281,157 +289,20 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-//===----------------------------------------------------------------------===//
-// MachineSchedStrategy - Interface to a machine scheduling algorithm.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ScheduleDAGMI;
-
-/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
-/// scheduling algorithm.
-///
-/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
-/// in ScheduleDAGInstrs.h
-class MachineSchedStrategy {
-public:
- virtual ~MachineSchedStrategy() {}
-
- /// Initialize the strategy after building the DAG for a new region.
- virtual void initialize(ScheduleDAGMI *DAG) = 0;
-
- /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
- /// schedule the node at the top of the unscheduled region. Otherwise it will
- /// be scheduled at the bottom.
- virtual SUnit *pickNode(bool &IsTopNode) = 0;
-
- /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node.
- virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
-
- /// When all predecessor dependencies have been resolved, free this node for
- /// top-down scheduling.
- virtual void releaseTopNode(SUnit *SU) = 0;
- /// When all successor dependencies have been resolved, free this node for
- /// bottom-up scheduling.
- virtual void releaseBottomNode(SUnit *SU) = 0;
-};
-} // namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ReadyQueue::dump() {
+ dbgs() << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+}
+#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
// preservation.
//===----------------------------------------------------------------------===//
-namespace {
-/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
-/// machine instructions while updating LiveIntervals.
-class ScheduleDAGMI : public ScheduleDAGInstrs {
- AliasAnalysis *AA;
- RegisterClassInfo *RegClassInfo;
- MachineSchedStrategy *SchedImpl;
-
- MachineBasicBlock::iterator LiveRegionEnd;
-
- /// Register pressure in this region computed by buildSchedGraph.
- IntervalPressure RegPressure;
- RegPressureTracker RPTracker;
-
- /// List of pressure sets that exceed the target's pressure limit before
- /// scheduling, listed in increasing set ID order. Each pressure set is paired
- /// with its max pressure in the currently scheduled regions.
- std::vector<PressureElement> RegionCriticalPSets;
-
- /// The top of the unscheduled zone.
- MachineBasicBlock::iterator CurrentTop;
- IntervalPressure TopPressure;
- RegPressureTracker TopRPTracker;
-
- /// The bottom of the unscheduled zone.
- MachineBasicBlock::iterator CurrentBottom;
- IntervalPressure BotPressure;
- RegPressureTracker BotRPTracker;
-
-#ifndef NDEBUG
- /// The number of instructions scheduled so far. Used to cut off the
- /// scheduler at the point determined by misched-cutoff.
- unsigned NumInstrsScheduled;
-#endif
-public:
- ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
- ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
- AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
- RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
- CurrentBottom(), BotRPTracker(BotPressure) {
-#ifndef NDEBUG
- NumInstrsScheduled = 0;
-#endif
- }
-
- ~ScheduleDAGMI() {
- delete SchedImpl;
- }
-
- MachineBasicBlock::iterator top() const { return CurrentTop; }
- MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
-
- /// Implement the ScheduleDAGInstrs interface for handling the next scheduling
- /// region. This covers all instructions in a block, while schedule() may only
- /// cover a subset.
- void enterRegion(MachineBasicBlock *bb,
- MachineBasicBlock::iterator begin,
- MachineBasicBlock::iterator end,
- unsigned endcount);
-
- /// Implement ScheduleDAGInstrs interface for scheduling a sequence of
- /// reorderable instructions.
- void schedule();
-
- /// Get current register pressure for the top scheduled instructions.
- const IntervalPressure &getTopPressure() const { return TopPressure; }
- const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
-
- /// Get current register pressure for the bottom scheduled instructions.
- const IntervalPressure &getBotPressure() const { return BotPressure; }
- const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
-
- /// Get register pressure for the entire scheduling region before scheduling.
- const IntervalPressure &getRegPressure() const { return RegPressure; }
-
- const std::vector<PressureElement> &getRegionCriticalPSets() const {
- return RegionCriticalPSets;
- }
-
- /// getIssueWidth - Return the max instructions per scheduling group.
- unsigned getIssueWidth() const {
- return (InstrItins && InstrItins->SchedModel)
- ? InstrItins->SchedModel->IssueWidth : 1;
- }
-
- /// getNumMicroOps - Return the number of issue slots required for this MI.
- unsigned getNumMicroOps(MachineInstr *MI) const {
- if (!InstrItins) return 1;
- int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
- return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
- }
-
-protected:
- void initRegPressure();
- void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
-
- void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
- bool checkSchedLimit();
-
- void releaseRoots();
-
- void releaseSucc(SUnit *SU, SDep *SuccEdge);
- void releaseSuccessors(SUnit *SU);
- void releasePred(SUnit *SU, SDep *PredEdge);
- void releasePredecessors(SUnit *SU);
-
- void placeDebugValues();
-};
-} // namespace
-
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
///
@@ -498,7 +369,7 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
BB->splice(InsertPos, BB, MI);
// Update LiveIntervals
- LIS->handleMove(MI);
+ LIS->handleMove(MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
@@ -565,6 +436,9 @@ void ScheduleDAGMI::initRegPressure() {
std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
unsigned Limit = TRI->getRegPressureSetLimit(i);
+ DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+ << "Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
if (RegionPressure[i] > Limit)
RegionCriticalPSets.push_back(PressureElement(i, 0));
}
@@ -587,6 +461,74 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
}
}
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easilly extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMI then it will want to override this virtual method in order to
+/// update any specialized state.
+void ScheduleDAGMI::schedule() {
+ buildDAGWithRegPressure();
+
+ postprocessDAG();
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ if (ViewMISchedDAGs) viewGraph();
+
+ initQueues();
+
+ bool IsTopNode = false;
+ while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = top()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMI::buildDAGWithRegPressure() {
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker);
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+ for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+ Mutations[i]->apply(this);
+ }
+}
+
// Release all DAG roots for scheduling.
void ScheduleDAGMI::releaseRoots() {
SmallVector<SUnit*, 16> BotRoots;
@@ -607,28 +549,10 @@ void ScheduleDAGMI::releaseRoots() {
SchedImpl->releaseBottomNode(*I);
}
-/// schedule - Called back from MachineScheduler::runOnMachineFunction
-/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
-/// only includes instructions that have DAG nodes, not scheduling boundaries.
-void ScheduleDAGMI::schedule() {
- // Initialize the register pressure tracker used by buildSchedGraph.
- RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
-
- // Account for liveness generate by the region boundary.
- if (LiveRegionEnd != RegionEnd)
- RPTracker.recede();
-
- // Build the DAG, and compute current register pressure.
- buildSchedGraph(AA, &RPTracker);
-
- // Initialize top/bottom trackers after computing region pressure.
- initRegPressure();
-
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
-
- if (ViewMISchedDAGs) viewGraph();
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues() {
+ // Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
// Release edges from the special Entry node or to the special Exit node.
@@ -638,61 +562,64 @@ void ScheduleDAGMI::schedule() {
// Release all DAG roots for scheduling.
releaseRoots();
+ SchedImpl->registerRoots();
+
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
CurrentBottom = RegionEnd;
- bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
- if (!checkSchedLimit())
- break;
-
- // Move the instruction to its new location in the instruction stream.
- MachineInstr *MI = SU->getInstr();
-
- if (IsTopNode) {
- assert(SU->isTopReady() && "node still has unscheduled dependencies");
- if (&*CurrentTop == MI)
- CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
- else {
- moveInstruction(MI, CurrentTop);
- TopRPTracker.setPos(MI);
- }
+}
- // Update top scheduled pressure.
- TopRPTracker.advance();
- assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
- updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+/// Move an instruction and update register pressure.
+void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
- // Release dependent instructions for scheduling.
- releaseSuccessors(SU);
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
}
+
+ // Update top scheduled pressure.
+ TopRPTracker.advance();
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
+ }
+ else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
else {
- assert(SU->isBottomReady() && "node still has unscheduled dependencies");
- MachineBasicBlock::iterator priorII =
- priorNonDebug(CurrentBottom, CurrentTop);
- if (&*priorII == MI)
- CurrentBottom = priorII;
- else {
- if (&*CurrentTop == MI) {
- CurrentTop = nextIfDebug(++CurrentTop, priorII);
- TopRPTracker.setPos(CurrentTop);
- }
- moveInstruction(MI, CurrentBottom);
- CurrentBottom = MI;
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
}
- // Update bottom scheduled pressure.
- BotRPTracker.recede();
- assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
- updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
-
- // Release dependent instructions for scheduling.
- releasePredecessors(SU);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
}
- SU->isScheduled = true;
- SchedImpl->schedNode(SU, IsTopNode);
+ // Update bottom scheduled pressure.
+ BotRPTracker.recede();
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
}
- assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
- placeDebugValues();
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+ // Release dependent instructions for scheduling.
+ if (IsTopNode)
+ releaseSuccessors(SU);
+ else
+ releasePredecessors(SU);
+
+ SU->isScheduled = true;
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
}
/// Reinsert any remaining debug_values, just like the PostRA scheduler.
@@ -716,91 +643,146 @@ void ScheduleDAGMI::placeDebugValues() {
FirstDbgValue = NULL;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+ for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+ if (SUnit *SU = getSUnit(&(*MI)))
+ SU->dump(this);
+ else
+ dbgs() << "Missing SUnit\n";
+ }
+}
+#endif
+
//===----------------------------------------------------------------------===//
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
//===----------------------------------------------------------------------===//
namespace {
-/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
-/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
-/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
-class ReadyQueue {
- unsigned ID;
- std::string Name;
- std::vector<SUnit*> Queue;
-
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
public:
- ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
-
- unsigned getID() const { return ID; }
-
- StringRef getName() const { return Name; }
-
- // SU is in this queue if it's NodeQueueID is a superset of this ID.
- bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
-
- bool empty() const { return Queue.empty(); }
-
- unsigned size() const { return Queue.size(); }
-
- typedef std::vector<SUnit*>::iterator iterator;
+ /// Represent the type of SchedCandidate found within a single queue.
+ /// pickNodeBidirectional depends on these listed by decreasing priority.
+ enum CandReason {
+ NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand,
+ BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce,
+ SingleMax, MultiPressure, NextDefUse, NodeOrder};
- iterator begin() { return Queue.begin(); }
+#ifndef NDEBUG
+ static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+#endif
- iterator end() { return Queue.end(); }
+ /// Policy for scheduling the next instruction in the candidate's zone.
+ struct CandPolicy {
+ bool ReduceLatency;
+ unsigned ReduceResIdx;
+ unsigned DemandResIdx;
- iterator find(SUnit *SU) {
- return std::find(Queue.begin(), Queue.end(), SU);
- }
+ CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
+ };
- void push(SUnit *SU) {
- Queue.push_back(SU);
- SU->NodeQueueId |= ID;
- }
+ /// Status of an instruction's critical resource consumption.
+ struct SchedResourceDelta {
+ // Count critical resources in the scheduled region required by SU.
+ unsigned CritResources;
- void remove(iterator I) {
- (*I)->NodeQueueId &= ~ID;
- *I = Queue.back();
- Queue.pop_back();
- }
+ // Count critical resources from another region consumed by SU.
+ unsigned DemandedResources;
- void dump() {
- dbgs() << Name << ": ";
- for (unsigned i = 0, e = Queue.size(); i < e; ++i)
- dbgs() << Queue[i]->NodeNum << " ";
- dbgs() << "\n";
- }
-};
+ SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
-/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class ConvergingScheduler : public MachineSchedStrategy {
+ bool operator==(const SchedResourceDelta &RHS) const {
+ return CritResources == RHS.CritResources
+ && DemandedResources == RHS.DemandedResources;
+ }
+ bool operator!=(const SchedResourceDelta &RHS) const {
+ return !operator==(RHS);
+ }
+ };
/// Store the state used by ConvergingScheduler heuristics, required for the
/// lifetime of one invocation of pickNode().
struct SchedCandidate {
+ CandPolicy Policy;
+
// The best SUnit candidate.
SUnit *SU;
+ // The reason for this candidate.
+ CandReason Reason;
+
// Register pressure values for the best candidate.
RegPressureDelta RPDelta;
- SchedCandidate(): SU(NULL) {}
+ // Critical resource consumption of the best candidate.
+ SchedResourceDelta ResDelta;
+
+ SchedCandidate(const CandPolicy &policy)
+ : Policy(policy), SU(NULL), Reason(NoCand) {}
+
+ bool isValid() const { return SU; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ SU = Best.SU;
+ Reason = Best.Reason;
+ RPDelta = Best.RPDelta;
+ ResDelta = Best.ResDelta;
+ }
+
+ void initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel);
+ };
+
+ /// Summarize the unscheduled region.
+ struct SchedRemainder {
+ // Critical path through the DAG in expected latency.
+ unsigned CriticalPath;
+
+ // Unscheduled resources
+ SmallVector<unsigned, 16> RemainingCounts;
+ // Critical resource for the unscheduled zone.
+ unsigned CritResIdx;
+ // Number of micro-ops left to schedule.
+ unsigned RemainingMicroOps;
+ // Is the unscheduled zone resource limited.
+ bool IsResourceLimited;
+
+ unsigned MaxRemainingCount;
+
+ void reset() {
+ CriticalPath = 0;
+ RemainingCounts.clear();
+ CritResIdx = 0;
+ RemainingMicroOps = 0;
+ IsResourceLimited = false;
+ MaxRemainingCount = 0;
+ }
+
+ SchedRemainder() { reset(); }
+
+ void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
};
- /// Represent the type of SchedCandidate found within a single queue.
- enum CandResult {
- NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure };
/// Each Scheduling boundary is associated with ready queues. It tracks the
- /// current cycle in whichever direction at has moved, and maintains the state
+ /// current cycle in the direction of movement, and maintains the state
/// of "hazards" and other interlocks at the current cycle.
struct SchedBoundary {
ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
+ SchedRemainder *Rem;
ReadyQueue Available;
ReadyQueue Pending;
bool CheckPending;
+ // For heuristics, keep a list of the nodes that immediately depend on the
+ // most recently scheduled node.
+ SmallPtrSet<const SUnit*, 8> NextSUs;
+
ScheduleHazardRecognizer *HazardRec;
unsigned CurrCycle;
@@ -809,29 +791,88 @@ class ConvergingScheduler : public MachineSchedStrategy {
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
+ // The expected latency of the critical path in this scheduled zone.
+ unsigned ExpectedLatency;
+
+ // Resources used in the scheduled zone beyond this boundary.
+ SmallVector<unsigned, 16> ResourceCounts;
+
+ // Cache the critical resources ID in this scheduled zone.
+ unsigned CritResIdx;
+
+ // Is the scheduled region resource limited vs. latency limited.
+ bool IsResourceLimited;
+
+ unsigned ExpectedCount;
+
+ // Policy flag: attempt to find ILP until expected latency is covered.
+ bool ShouldIncreaseILP;
+
+#ifndef NDEBUG
// Remember the greatest min operand latency.
unsigned MaxMinLatency;
+#endif
+
+ void reset() {
+ Available.clear();
+ Pending.clear();
+ CheckPending = false;
+ NextSUs.clear();
+ HazardRec = 0;
+ CurrCycle = 0;
+ IssueCount = 0;
+ MinReadyCycle = UINT_MAX;
+ ExpectedLatency = 0;
+ ResourceCounts.resize(1);
+ assert(!ResourceCounts[0] && "nonzero count for bad resource");
+ CritResIdx = 0;
+ IsResourceLimited = false;
+ ExpectedCount = 0;
+ ShouldIncreaseILP = false;
+#ifndef NDEBUG
+ MaxMinLatency = 0;
+#endif
+ // Reserve a zero-count for invalid CritResIdx.
+ ResourceCounts.resize(1);
+ }
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
- DAG(0), Available(ID, Name+".A"),
- Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
- CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
- MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
+ DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
+ Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+ reset();
+ }
~SchedBoundary() { delete HazardRec; }
+ void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
+ SchedRemainder *rem);
+
bool isTop() const {
return Available.getID() == ConvergingScheduler::TopQID;
}
+ unsigned getUnscheduledLatency(SUnit *SU) const {
+ if (isTop())
+ return SU->getHeight();
+ return SU->getDepth();
+ }
+
+ unsigned getCriticalCount() const {
+ return ResourceCounts[CritResIdx];
+ }
+
bool checkHazard(SUnit *SU);
+ void checkILPPolicy();
+
void releaseNode(SUnit *SU, unsigned ReadyCycle);
void bumpCycle();
+ void countResource(unsigned PIdx, unsigned Cycles);
+
void bumpNode(SUnit *SU);
void releasePending();
@@ -841,10 +882,13 @@ class ConvergingScheduler : public MachineSchedStrategy {
SUnit *pickOnlyChoice();
};
+private:
ScheduleDAGMI *DAG;
+ const TargetSchedModel *SchedModel;
const TargetRegisterInfo *TRI;
// State of the top and bottom scheduled instruction boundaries.
+ SchedRemainder Rem;
SchedBoundary Top;
SchedBoundary Bot;
@@ -857,7 +901,7 @@ public:
};
ConvergingScheduler():
- DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+ DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
virtual void initialize(ScheduleDAGMI *dag);
@@ -869,28 +913,80 @@ public:
virtual void releaseBottomNode(SUnit *SU);
+ virtual void registerRoots();
+
protected:
- SUnit *pickNodeBidrectional(bool &IsTopNode);
+ void balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand);
+
+ void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand);
+
+ void tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker);
+
+ SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+ void pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Candidate);
- CandResult pickNodeFromQueue(ReadyQueue &Q,
- const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate);
#ifndef NDEBUG
- void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
- PressureElement P = PressureElement());
+ void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
#endif
};
} // namespace
+void ConvergingScheduler::SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+ reset();
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+ for (std::vector<SUnit>::iterator
+ I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+ RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ }
+ }
+}
+
+void ConvergingScheduler::SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+ reset();
+ DAG = dag;
+ SchedModel = smodel;
+ Rem = rem;
+ if (SchedModel->hasInstrSchedModel())
+ ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
+}
+
void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
DAG = dag;
+ SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
- Top.DAG = dag;
- Bot.DAG = dag;
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
- // Initialize the HazardRecognizers.
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
const TargetMachine &TM = DAG->MF.getTarget();
- const InstrItineraryData *Itin = TM.getInstrItineraryData();
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
@@ -905,13 +1001,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
- unsigned Latency =
- DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+ unsigned MinLatency = I->getMinLatency();
#ifndef NDEBUG
- Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+ Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
#endif
- if (SU->TopReadyCycle < PredReadyCycle + Latency)
- SU->TopReadyCycle = PredReadyCycle + Latency;
+ if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
+ SU->TopReadyCycle = PredReadyCycle + MinLatency;
}
Top.releaseNode(SU, SU->TopReadyCycle);
}
@@ -925,17 +1020,27 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
- unsigned Latency =
- DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+ unsigned MinLatency = I->getMinLatency();
#ifndef NDEBUG
- Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+ Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
#endif
- if (SU->BotReadyCycle < SuccReadyCycle + Latency)
- SU->BotReadyCycle = SuccReadyCycle + Latency;
+ if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
+ SU->BotReadyCycle = SuccReadyCycle + MinLatency;
}
Bot.releaseNode(SU, SU->BotReadyCycle);
}
+void ConvergingScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (std::vector<SUnit*>::const_iterator
+ I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
/// Does this SU have a hazard within the current instruction group.
///
/// The scheduler supports two modes of hazard recognition. The first is the
@@ -953,14 +1058,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
if (HazardRec->isEnabled())
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
- if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
-
+ }
return false;
}
+/// If expected latency is covered, disable ILP policy.
+void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
+ if (ShouldIncreaseILP
+ && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
+ ShouldIncreaseILP = false;
+ DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
+ }
+}
+
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
+
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -970,15 +1088,31 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
Pending.push(SU);
else
Available.push(SU);
+
+ // Record this node as an immediate dependent of the scheduled node.
+ NextSUs.insert(SU);
+
+ // If CriticalPath has been computed, then check if the unscheduled nodes
+ // exceed the ILP window. Before registerRoots, CriticalPath==0.
+ if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
+ > Rem->CriticalPath + ILPWindow)) {
+ ShouldIncreaseILP = true;
+ DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
+ << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
+ }
}
/// Move the boundary of scheduled code by one cycle.
void ConvergingScheduler::SchedBoundary::bumpCycle() {
- unsigned Width = DAG->getIssueWidth();
+ unsigned Width = SchedModel->getIssueWidth();
IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
+ unsigned NextCycle = CurrCycle + 1;
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
- unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
+ if (MinReadyCycle > NextCycle) {
+ IssueCount = 0;
+ NextCycle = MinReadyCycle;
+ }
if (!HazardRec->isEnabled()) {
// Bypass HazardRec virtual calls.
@@ -994,11 +1128,39 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
}
}
CheckPending = true;
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
- DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
+ DEBUG(dbgs() << " *** " << Available.getName() << " cycle "
<< CurrCycle << '\n');
}
+/// Add the given processor resource to this scheduled zone.
+void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
+ unsigned Cycles) {
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name
+ << " +(" << Cycles << "x" << Factor
+ << ") / " << SchedModel->getLatencyFactor() << '\n');
+
+ unsigned Count = Factor * Cycles;
+ ResourceCounts[PIdx] += Count;
+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+ Rem->RemainingCounts[PIdx] -= Count;
+
+ // Reset MaxRemainingCount for sanity.
+ Rem->MaxRemainingCount = 0;
+
+ // Check if this resource exceeds the current critical resource by a full
+ // cycle. If so, it becomes the critical resource.
+ if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
+ >= (int)SchedModel->getLatencyFactor()) {
+ CritResIdx = PIdx;
+ DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getProcResource(PIdx)->Name << " x"
+ << ResourceCounts[PIdx] << '\n');
+ }
+}
+
/// Move the boundary of scheduled code by one SUnit.
void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
// Update the reservation table.
@@ -1010,11 +1172,38 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
}
HazardRec->EmitInstruction(SU);
}
+ // Update resource counts and critical resource.
+ if (SchedModel->hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ countResource(PI->ProcResourceIdx, PI->Cycles);
+ }
+ }
+ if (isTop()) {
+ if (SU->getDepth() > ExpectedLatency)
+ ExpectedLatency = SU->getDepth();
+ }
+ else {
+ if (SU->getHeight() > ExpectedLatency)
+ ExpectedLatency = SU->getHeight();
+ }
+
+ IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
+
// Check the instruction group dispatch limit.
// TODO: Check if this SU must end a dispatch group.
- IssueCount += DAG->getNumMicroOps(SU->getInstr());
- if (IssueCount >= DAG->getIssueWidth()) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
+
+ // checkHazard prevents scheduling multiple instructions per cycle that exceed
+ // issue width. However, we commonly reach the maximum. In this case
+ // opportunistically bump the cycle to avoid uselessly checking everything in
+ // the readyQ. Furthermore, a single instruction may produce more than one
+ // cycle's worth of micro-ops.
+ if (IssueCount >= SchedModel->getIssueWidth()) {
+ DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n');
bumpCycle();
}
}
@@ -1045,6 +1234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
Pending.remove(Pending.begin()+i);
--i; --e;
}
+ DEBUG(if (!Pending.empty()) Pending.dump());
CheckPending = false;
}
@@ -1059,12 +1249,23 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
}
/// If this queue only has one ready candidate, return it. As a side effect,
-/// advance the cycle until at least one node is ready. If multiple instructions
-/// are ready, return NULL.
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
if (CheckPending)
releasePending();
+ if (IssueCount > 0) {
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
+ }
+ ++I;
+ }
+ }
for (unsigned i = 0; Available.empty(); ++i) {
assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
"permanent hazard"); (void)i;
@@ -1076,18 +1277,262 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
return NULL;
}
-#ifndef NDEBUG
-void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
- SUnit *SU, PressureElement P) {
- dbgs() << Label << " " << Q.getName() << " ";
- if (P.isValid())
- dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
- << " ";
- else
- dbgs() << " ";
- SU->dump(DAG);
+/// Record the candidate policy for opposite zones with different critical
+/// resources.
+///
+/// If the CriticalZone is latency limited, don't force a policy for the
+/// candidates here. Instead, When releasing each candidate, releaseNode
+/// compares the region's critical path to the candidate's height or depth and
+/// the scheduled zone's expected latency then sets ShouldIncreaseILP.
+void ConvergingScheduler::balanceZones(
+ ConvergingScheduler::SchedBoundary &CriticalZone,
+ ConvergingScheduler::SchedCandidate &CriticalCand,
+ ConvergingScheduler::SchedBoundary &OppositeZone,
+ ConvergingScheduler::SchedCandidate &OppositeCand) {
+
+ if (!CriticalZone.IsResourceLimited)
+ return;
+
+ SchedRemainder *Rem = CriticalZone.Rem;
+
+ // If the critical zone is overconsuming a resource relative to the
+ // remainder, try to reduce it.
+ unsigned RemainingCritCount =
+ Rem->RemainingCounts[CriticalZone.CritResIdx];
+ if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+ << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
+ << '\n');
+ }
+ // If the other zone is underconsuming a resource relative to the full zone,
+ // try to increase it.
+ unsigned OppositeCount =
+ OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
+ if ((int)(OppositeZone.ExpectedCount - OppositeCount)
+ > (int)SchedModel->getLatencyFactor()) {
+ OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
+ DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+ << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
+ << '\n');
+ }
+}
+
+/// Determine if the scheduled zones exceed resource limits or critical path and
+/// set each candidate's ReduceHeight policy accordingly.
+void ConvergingScheduler::checkResourceLimits(
+ ConvergingScheduler::SchedCandidate &TopCand,
+ ConvergingScheduler::SchedCandidate &BotCand) {
+
+ Bot.checkILPPolicy();
+ Top.checkILPPolicy();
+ if (Bot.ShouldIncreaseILP)
+ BotCand.Policy.ReduceLatency = true;
+ if (Top.ShouldIncreaseILP)
+ TopCand.Policy.ReduceLatency = true;
+
+ // Handle resource-limited regions.
+ if (Top.IsResourceLimited && Bot.IsResourceLimited
+ && Top.CritResIdx == Bot.CritResIdx) {
+ // If the scheduled critical resource in both zones is no longer the
+ // critical remaining resource, attempt to reduce resource height both ways.
+ if (Top.CritResIdx != Rem.CritResIdx) {
+ TopCand.Policy.ReduceResIdx = Top.CritResIdx;
+ BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
+ DEBUG(dbgs() << "Reduce scheduled "
+ << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
+ }
+ return;
+ }
+ // Handle latency-limited regions.
+ if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
+ // If the total scheduled expected latency exceeds the region's critical
+ // path then reduce latency both ways.
+ //
+ // Just because a zone is not resource limited does not mean it is latency
+ // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
+ // to exceed expected latency.
+ if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
+ && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
+ TopCand.Policy.ReduceLatency = true;
+ BotCand.Policy.ReduceLatency = true;
+ DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+ << " + " << Bot.ExpectedLatency << '\n');
+ }
+ return;
+ }
+ // The critical resource is different in each zone, so request balancing.
+
+ // Compute the cost of each zone.
+ Rem.MaxRemainingCount = std::max(
+ Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
+ Rem.RemainingCounts[Rem.CritResIdx]);
+ Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
+ Top.ExpectedCount = std::max(
+ Top.getCriticalCount(),
+ Top.ExpectedCount * SchedModel->getLatencyFactor());
+ Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
+ Bot.ExpectedCount = std::max(
+ Bot.getCriticalCount(),
+ Bot.ExpectedCount * SchedModel->getLatencyFactor());
+
+ balanceZones(Top, TopCand, Bot, BotCand);
+ balanceZones(Bot, BotCand, Top, TopCand);
+}
+
+void ConvergingScheduler::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel) {
+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+ return;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+ ResDelta.CritResources += PI->Cycles;
+ if (PI->ProcResourceIdx == Policy.DemandResIdx)
+ ResDelta.DemandedResources += PI->Cycles;
+ }
+}
+
+/// Return true if this heuristic determines order.
+static bool tryLess(unsigned TryVal, unsigned CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+static bool tryGreater(unsigned TryVal, unsigned CandVal,
+ ConvergingScheduler::SchedCandidate &TryCand,
+ ConvergingScheduler::SchedCandidate &Cand,
+ ConvergingScheduler::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+
+ // Always initialize TryCand's RPDelta.
+ TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+ // Avoid exceeding the target's limit.
+ if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
+ Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
+ return;
+ if (Cand.Reason == SingleExcess)
+ Cand.Reason = MultiPressure;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
+ Cand.RPDelta.CriticalMax.UnitIncrease,
+ TryCand, Cand, SingleCritical))
+ return;
+ if (Cand.Reason == SingleCritical)
+ Cand.Reason = MultiPressure;
+
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, TopDepthReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, TopPathReduce))
+ return;
+ }
+ else {
+ if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
+ > Zone.ExpectedCount) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, BotHeightReduce))
+ return;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, BotPathReduce))
+ return;
+ }
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
+ Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
+ return;
+ if (Cand.Reason == SingleMax)
+ Cand.Reason = MultiPressure;
+
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // nice pressure avoidance strategy that also conserves the processor's
+ // register renaming resources and keeps the machine code readable.
+ if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) {
+ TryCand.Reason = NextDefUse;
+ return;
+ }
+ if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) {
+ if (Cand.Reason > NextDefUse)
+ Cand.Reason = NextDefUse;
+ return;
+ }
+ // Fall through to original instruction order.
+ if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
}
-#endif
/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
/// more desirable than RHS from scheduling standpoint.
@@ -1098,109 +1543,144 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
// have UnitIncrease==0, so are neutral.
// Avoid increasing the max critical pressure in the scheduled region.
- if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease)
+ if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
+ DEBUG(dbgs() << "RP excess top - bot: "
+ << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
-
+ }
// Avoid increasing the max critical pressure in the scheduled region.
- if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease)
+ if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP critical top - bot: "
+ << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
+ << '\n');
return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
-
+ }
// Avoid increasing the max pressure of the entire region.
- if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease)
+ if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
+ DEBUG(dbgs() << "RP current top - bot: "
+ << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
+ << '\n');
return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
-
+ }
return false;
}
+#ifndef NDEBUG
+const char *ConvergingScheduler::getReasonStr(
+ ConvergingScheduler::CandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND ";
+ case SingleExcess: return "REG-EXCESS";
+ case SingleCritical: return "REG-CRIT ";
+ case SingleMax: return "REG-MAX ";
+ case MultiPressure: return "REG-MULTI ";
+ case ResourceReduce: return "RES-REDUCE";
+ case ResourceDemand: return "RES-DEMAND";
+ case TopDepthReduce: return "TOP-DEPTH ";
+ case TopPathReduce: return "TOP-PATH ";
+ case BotHeightReduce:return "BOT-HEIGHT";
+ case BotPathReduce: return "BOT-PATH ";
+ case NextDefUse: return "DEF-USE ";
+ case NodeOrder: return "ORDER ";
+ };
+ llvm_unreachable("Unknown reason!");
+}
+
+void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
+ const SchedBoundary &Zone) {
+ const char *Label = getReasonStr(Cand.Reason);
+ PressureElement P;
+ unsigned ResIdx = 0;
+ unsigned Latency = 0;
+ switch (Cand.Reason) {
+ default:
+ break;
+ case SingleExcess:
+ P = Cand.RPDelta.Excess;
+ break;
+ case SingleCritical:
+ P = Cand.RPDelta.CriticalMax;
+ break;
+ case SingleMax:
+ P = Cand.RPDelta.CurrentMax;
+ break;
+ case ResourceReduce:
+ ResIdx = Cand.Policy.ReduceResIdx;
+ break;
+ case ResourceDemand:
+ ResIdx = Cand.Policy.DemandResIdx;
+ break;
+ case TopDepthReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ case TopPathReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotHeightReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotPathReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ }
+ dbgs() << Label << " " << Zone.Available.getName() << " ";
+ if (P.isValid())
+ dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
+ << " ";
+ else
+ dbgs() << " ";
+ if (ResIdx)
+ dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
+ else
+ dbgs() << " ";
+ if (Latency)
+ dbgs() << Latency << " cycles ";
+ else
+ dbgs() << " ";
+ Cand.SU->dump(DAG);
+}
+#endif
+
/// Pick the best candidate from the top queue.
///
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
-ConvergingScheduler::CandResult ConvergingScheduler::
-pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate) {
+void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ ReadyQueue &Q = Zone.Available;
+
DEBUG(Q.dump());
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
- // BestSU remains NULL if no top candidates beat the best existing candidate.
- CandResult FoundCandidate = NoCand;
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- RegPressureDelta RPDelta;
- TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
- DAG->getRegionCriticalPSets(),
- DAG->getRegPressure().MaxSetPressure);
-
- // Initialize the candidate if needed.
- if (!Candidate.SU) {
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- FoundCandidate = NodeOrder;
- continue;
- }
- // Avoid exceeding the target's limit.
- if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) {
- DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- FoundCandidate = SingleExcess;
- continue;
- }
- if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease)
- continue;
- if (FoundCandidate == SingleExcess)
- FoundCandidate = MultiPressure;
-
- // Avoid increasing the max critical pressure in the scheduled region.
- if (RPDelta.CriticalMax.UnitIncrease
- < Candidate.RPDelta.CriticalMax.UnitIncrease) {
- DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- FoundCandidate = SingleCritical;
- continue;
- }
- if (RPDelta.CriticalMax.UnitIncrease
- > Candidate.RPDelta.CriticalMax.UnitIncrease)
- continue;
- if (FoundCandidate == SingleCritical)
- FoundCandidate = MultiPressure;
-
- // Avoid increasing the max pressure of the entire region.
- if (RPDelta.CurrentMax.UnitIncrease
- < Candidate.RPDelta.CurrentMax.UnitIncrease) {
- DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- FoundCandidate = SingleMax;
- continue;
- }
- if (RPDelta.CurrentMax.UnitIncrease
- > Candidate.RPDelta.CurrentMax.UnitIncrease)
- continue;
- if (FoundCandidate == SingleMax)
- FoundCandidate = MultiPressure;
-
- // Fall through to original instruction order.
- // Only consider node order if Candidate was chosen from this Q.
- if (FoundCandidate == NoCand)
- continue;
- if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
- || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
- DEBUG(traceCandidate("NCAND", Q, *I));
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- FoundCandidate = NodeOrder;
+ SchedCandidate TryCand(Cand.Policy);
+ TryCand.SU = *I;
+ tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(DAG, SchedModel);
+ Cand.setBest(TryCand);
+ DEBUG(traceCandidate(Cand, Zone));
}
+ TryCand.SU = *I;
}
- return FoundCandidate;
+}
+
+static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+ bool IsTop) {
+ DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
+ << " SU(" << Cand.SU->NodeNum << ") "
+ << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
}
/// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
+SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
@@ -1211,11 +1691,14 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
IsTopNode = true;
return SU;
}
- SchedCandidate BotCand;
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ SchedCandidate TopCand(NoPolicy);
+ checkResourceLimits(TopCand, BotCand);
+
// Prefer bottom scheduling when heuristics are silent.
- CandResult BotResult = pickNodeFromQueue(Bot.Available,
- DAG->getBotRPTracker(), BotCand);
- assert(BotResult != NoCand && "failed to find the first candidate");
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
// If either Q has a single candidate that provides the least increase in
// Excess pressure, we can immediately schedule from that Q.
@@ -1224,37 +1707,41 @@ SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) {
// affects picking from either Q. If scheduling in one direction must
// increase pressure for one of the excess PSets, then schedule in that
// direction first to provide more freedom in the other direction.
- if (BotResult == SingleExcess || BotResult == SingleCritical) {
+ if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
return BotCand.SU;
}
// Check if the top Q has a better candidate.
- SchedCandidate TopCand;
- CandResult TopResult = pickNodeFromQueue(Top.Available,
- DAG->getTopRPTracker(), TopCand);
- assert(TopResult != NoCand && "failed to find the first candidate");
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
- if (TopResult == SingleExcess || TopResult == SingleCritical) {
- IsTopNode = true;
- return TopCand.SU;
- }
// If either Q has a single candidate that minimizes pressure above the
// original region's pressure pick it.
- if (BotResult == SingleMax) {
+ if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
+ if (TopCand.Reason < BotCand.Reason) {
+ IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
+ return TopCand.SU;
+ }
IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
return BotCand.SU;
}
- if (TopResult == SingleMax) {
+ // Check for a salient pressure difference and pick the best from either side.
+ if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
return TopCand.SU;
}
- // Check for a salient pressure difference and pick the best from either side.
- if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
+ // Otherwise prefer the bottom candidate, in node order if all else failed.
+ if (TopCand.Reason < BotCand.Reason) {
IsTopNode = true;
+ tracePick(TopCand, IsTopNode);
return TopCand.SU;
}
- // Otherwise prefer the bottom candidate in node order.
IsTopNode = false;
+ tracePick(BotCand, IsTopNode);
return BotCand.SU;
}
@@ -1266,33 +1753,34 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
return NULL;
}
SUnit *SU;
- if (ForceTopDown) {
- SU = Top.pickOnlyChoice();
- if (!SU) {
- SchedCandidate TopCand;
- CandResult TopResult =
- pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
- assert(TopResult != NoCand && "failed to find the first candidate");
- (void)TopResult;
- SU = TopCand.SU;
+ do {
+ if (ForceTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate TopCand(NoPolicy);
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
}
- IsTopNode = true;
- }
- else if (ForceBottomUp) {
- SU = Bot.pickOnlyChoice();
- if (!SU) {
- SchedCandidate BotCand;
- CandResult BotResult =
- pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
- assert(BotResult != NoCand && "failed to find the first candidate");
- (void)BotResult;
- SU = BotCand.SU;
+ else if (ForceBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ SchedCandidate BotCand(NoPolicy);
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
}
- IsTopNode = false;
- }
- else {
- SU = pickNodeBidrectional(IsTopNode);
- }
+ else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
if (SU->isTopReady())
Top.removeReady(SU);
if (SU->isBottomReady())
@@ -1331,6 +1819,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
createConvergingSched);
//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+ ScheduleDAGILP *ILP;
+ bool MaximizeILP;
+
+ ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+
+ /// \brief Apply a less-than relation on node priority.
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ // Return true if A comes after B in the Q.
+ if (MaximizeILP)
+ return ILP->getILP(A) < ILP->getILP(B);
+ else
+ return ILP->getILP(A) > ILP->getILP(B);
+ }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ ScheduleDAGILP ILP;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+public:
+ ILPScheduler(bool MaximizeILP)
+ : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+
+ virtual void initialize(ScheduleDAGMI *DAG) {
+ ReadyQ.clear();
+ ILP.resize(DAG->SUnits.size());
+ }
+
+ virtual void registerRoots() {
+ for (std::vector<SUnit*>::const_iterator
+ I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
+ ILP.computeILP(*I);
+ }
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ virtual SUnit *pickNode(bool &IsTopNode) {
+ if (ReadyQ.empty()) return NULL;
+ pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
+ << " ILP: " << ILP.getILP(SU) << '\n');
+ return SU;
+ }
+
+ virtual void schedNode(SUnit *, bool) {}
+
+ virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+ virtual void releaseBottomNode(SUnit *SU) {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
// Machine Instruction Shuffler for Correctness Testing
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index bc383cb..b117f8c 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -49,7 +49,6 @@ namespace {
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *LI;
AliasAnalysis *AA;
- BitVector AllocatableSet; // Which physregs are allocatable?
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
@@ -229,7 +228,6 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- AllocatableSet = TRI->getAllocatableSet(MF);
bool EverMadeChange = false;
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 1a3aa60..9686b04 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -14,9 +14,10 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -50,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
- ItinData = MF->getTarget().getInstrItineraryData();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST =
+ MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
return false;
}
@@ -674,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
- if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ if (!DefTBI.isEarlierInSameTrace(TBI))
continue;
unsigned Len = LIR.Height + Cycles[DefMI].Depth;
MaxLen = std::max(MaxLen, Len);
@@ -737,16 +740,15 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
const TraceBlockInfo&DepTBI =
BlockInfo[Dep.DefMI->getParent()->getNumber()];
// Ignore dependencies from outside the current trace.
- if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+ if (!DepTBI.isEarlierInSameTrace(TBI))
continue;
assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
- DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
- Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp,
- /* FindMin = */ false);
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+ /* FindMin = */ false);
Cycle = std::max(Cycle, DepCycle);
}
// Remember the instruction depth.
@@ -769,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
// Height is the issue height computed from virtual register dependencies alone.
static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
SparseSet<LiveRegUnit> &RegUnits,
- const InstrItineraryData *ItinData,
+ const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
@@ -792,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
unsigned DepHeight = I->Cycle;
if (!MI->isTransient()) {
// We may not know the UseMI of this dependency, if it came from the
- // live-in list.
- if (I->MI)
- DepHeight += TII->computeOperandLatency(ItinData,
- MI, MO.getOperandNo(),
- I->MI, I->Op);
- else
- // No UseMI. Just use the MI latency instead.
- DepHeight += TII->getInstrLatency(ItinData, MI);
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel
+ .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+ /* FindMin = */ false);
}
Height = std::max(Height, DepHeight);
// This regunit is dead above MI.
@@ -832,12 +830,12 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
static bool pushDepHeight(const DataDep &Dep,
const MachineInstr *UseMI, unsigned UseHeight,
MIHeightMap &Heights,
- const InstrItineraryData *ItinData,
+ const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII) {
// Adjust height by Dep.DefMI latency.
if (!Dep.DefMI->isTransient())
- UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp);
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp, false);
// Update Heights[DefMI] to be the maximum height seen.
MIHeightMap::iterator I;
@@ -852,14 +850,14 @@ static bool pushDepHeight(const DataDep &Dep,
return false;
}
-/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
-/// of all the blocks in Trace. Stop when reaching the block that contains
-/// DefMI.
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
void MachineTraceMetrics::Ensemble::
-addLiveIns(const MachineInstr *DefMI,
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
- unsigned Reg = DefMI->getOperand(0).getReg();
+ unsigned Reg = DefMI->getOperand(DefOp).getReg();
assert(TargetRegisterInfo::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
@@ -951,8 +949,8 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
if (pushDepHeight(Deps.front(), PHI, Height,
- Heights, MTM.ItinData, MTM.TII))
- addLiveIns(Deps.front().DefMI, Stack);
+ Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
}
}
}
@@ -980,12 +978,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// There may also be regunit dependencies to include in the height.
if (HasPhysRegs)
Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
- MTM.ItinData, MTM.TII, MTM.TRI);
+ MTM.SchedModel, MTM.TII, MTM.TRI);
// Update the required height of any virtual registers read by MI.
for (unsigned i = 0, e = Deps.size(); i != e; ++i)
- if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
- addLiveIns(Deps[i].DefMI, Stack);
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
InstrCycles &MICycles = Cycles[MI];
MICycles.Height = Cycle;
@@ -1054,10 +1052,8 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
- DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
- Dep.DefMI, Dep.DefOp,
- PHI, Dep.UseOp,
- /* FindMin = */ false);
+ DepCycle += TE.MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
return DepCycle;
}
@@ -1068,9 +1064,8 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
unsigned Instrs = TBI.InstrDepth;
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
- if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
- if (Model->IssueWidth != 0)
- return Instrs / Model->IssueWidth;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
// Assume issue width 1 without a schedule model.
return Instrs;
}
@@ -1080,9 +1075,8 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
- if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
- if (Model->IssueWidth != 0)
- return Instrs / Model->IssueWidth;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
// Assume issue width 1 without a schedule model.
return Instrs;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
index c5b86f3..460730b 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.h
@@ -50,6 +50,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetSchedule.h"
namespace llvm {
@@ -67,9 +68,9 @@ class MachineTraceMetrics : public MachineFunctionPass {
const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const InstrItineraryData *ItinData;
const MachineRegisterInfo *MRI;
const MachineLoopInfo *Loops;
+ TargetSchedModel SchedModel;
public:
class Ensemble;
@@ -164,6 +165,14 @@ public:
/// Invalidate height resources when a block below this one has changed.
void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+ /// Determine if this block belongs to the same trace as TBI and comes
+ /// before it in the trace.
+ /// Also returns true when TBI == this.
+ bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const {
+ return hasValidDepth() && TBI.hasValidDepth() &&
+ Head == TBI.Head && InstrDepth <= TBI.InstrDepth;
+ }
+
// Data-dependency-related information. Per-instruction depth and height
// are computed from data dependencies in the current trace, using
// itinerary data.
@@ -270,7 +279,7 @@ public:
unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
void computeInstrDepths(const MachineBasicBlock*);
void computeInstrHeights(const MachineBasicBlock*);
- void addLiveIns(const MachineInstr *DefMI,
+ void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace);
protected:
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index f745b41..69a3ae8 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -23,8 +23,9 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
+#include "llvm/BasicBlock.h"
+#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -73,11 +74,12 @@ namespace {
typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
BitVector regsReserved;
- BitVector regsAllocatable;
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
RegMaskVector regMasks;
@@ -117,6 +119,9 @@ namespace {
// block. This set is disjoint from regsLiveOut.
RegSet vregsRequired;
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
BBInfo() : reachable(false) {}
// Add register to vregsPassed if it belongs there. Return true if
@@ -180,7 +185,7 @@ namespace {
}
bool isAllocatable(unsigned Reg) {
- return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+ return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
}
// Analysis information if available
@@ -208,6 +213,8 @@ namespace {
void report(const char *msg, const MachineBasicBlock *MBB,
const LiveInterval &LI);
+ void verifyInlineAsm(const MachineInstr *MI);
+
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
@@ -352,7 +359,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
MF->print(*OS, Indexes);
}
*OS << "*** Bad machine code: " << msg << " ***\n"
- << "- function: " << MF->getFunction()->getName() << "\n";
+ << "- function: " << MF->getName() << "\n";
}
void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -360,7 +367,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
report(msg, MBB->getParent());
*OS << "- basic block: BB#" << MBB->getNumber()
<< ' ' << MBB->getName()
- << " (" << (void*)MBB << ')';
+ << " (" << (const void*)MBB << ')';
if (Indexes)
*OS << " [" << Indexes->getMBBStartIdx(MBB)
<< ';' << Indexes->getMBBEndIdx(MBB) << ')';
@@ -419,7 +426,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
- regsReserved = TRI->getReservedRegs(*MF);
+ regsReserved = MRI->getReservedRegs();
// A sub-register of a reserved register is also reserved
for (int Reg = regsReserved.find_first(); Reg>=0;
@@ -431,9 +438,23 @@ void MachineVerifier::visitMachineFunctionBefore() {
}
}
- regsAllocatable = TRI->getAllocatableSet(*MF);
-
markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ FunctionBlocks.insert(I);
+ BBInfo &MInfo = MBBInfoMap[I];
+
+ MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+ if (MInfo.Preds.size() != I->pred_size())
+ report("MBB has duplicate entries in its predecessor list.", I);
+
+ MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+ if (MInfo.Succs.size() != I->succ_size())
+ report("MBB has duplicate entries in its successor list.", I);
+ }
}
// Does iterator point to a and b as the first two elements?
@@ -470,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
E = MBB->succ_end(); I != E; ++I) {
if ((*I)->isLandingPad())
LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ *OS << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
}
const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -540,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
++MBBI;
if (MBBI == MF->end()) {
report("MBB conditionally falls through out of function!", MBB);
- } if (MBB->succ_size() != 2) {
+ } if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -560,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
} else if (TBB && FBB) {
// Block conditionally branches somewhere, otherwise branches
// somewhere else.
- if (MBB->succ_size() != 2) {
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
} else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -639,6 +695,50 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
}
}
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+ // The first two operands on INLINEASM are the asm string and global flags.
+ if (MI->getNumOperands() < 2) {
+ report("Too few operands on inline asm", MI);
+ return;
+ }
+ if (!MI->getOperand(0).isSymbol())
+ report("Asm string must be an external symbol", MI);
+ if (!MI->getOperand(1).isImm())
+ report("Asm flags must be an immediate", MI);
+ // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
+ if (!isUInt<5>(MI->getOperand(1).getImm()))
+ report("Unknown asm flags", &MI->getOperand(1), 1);
+
+ assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ unsigned NumOps;
+ for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // There may be implicit ops after the fixed operands.
+ if (!MO.isImm())
+ break;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ }
+
+ if (OpNo > MI->getNumOperands())
+ report("Missing operands in last group", MI);
+
+ // An optional MDNode follows the groups.
+ if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+ ++OpNo;
+
+ // All trailing operands must be implicit registers.
+ for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isReg() || !MO.isImplicit())
+ report("Expected implicit register after groups", &MO, OpNo);
+ }
+}
+
void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
if (MI->getNumOperands() < MCID.getNumOperands()) {
@@ -647,6 +747,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
<< MI->getNumExplicitOperands() << " given.\n";
}
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
// Check the MachineMemOperands for basic consistency.
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
E = MI->memoperands_end(); I != E; ++I) {
@@ -702,6 +806,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MO->isImplicit())
report("Explicit operand marked as implicit", MO, MONum);
}
+
+ int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ if (!MO->isReg())
+ report("Tied use must be a register", MO, MONum);
+ else if (!MO->isTied())
+ report("Operand should be tied", MO, MONum);
+ else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+ report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ } else if (MO->isReg() && MO->isTied())
+ report("Explicit operand should not be tied", MO, MONum);
} else {
// ARM adds %reg0 operands to indicate predicates. We'll allow that.
if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
@@ -716,6 +831,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugValue())
checkLiveness(MO, MONum);
+ // Verify the consistency of tied operands.
+ if (MO->isTied()) {
+ unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+ const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+ if (!OtherMO.isReg())
+ report("Must be tied to a register", MO, MONum);
+ if (!OtherMO.isTied())
+ report("Missing tie flags on tied operand", MO, MONum);
+ if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+ report("Inconsistent tie links", MO, MONum);
+ if (MONum < MCID.getNumDefs()) {
+ if (OtherIdx < MCID.getNumOperands()) {
+ if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+ report("Explicit def tied to explicit use without tie constraint",
+ MO, MONum);
+ } else {
+ if (!OtherMO.isImplicit())
+ report("Explicit def should be tied to implicit use", MO, MONum);
+ }
+ }
+ }
+
// Verify two-address constraints after leaving SSA form.
unsigned DefIdx;
if (!MRI->isSSA() && MO->isUse() &&
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index cfa3eec..4ea21d4 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -49,8 +49,8 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
cl::desc("Disable Stack Slot Coloring"));
static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden,
- cl::desc("Enable Early If-conversion"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+ cl::desc("Disable Early If-conversion"));
static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
cl::desc("Disable Machine LICM"));
static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
@@ -161,7 +161,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
return applyDisable(TargetID, DisableMachineDCE);
if (StandardID == &EarlyIfConverterID)
- return applyDisable(TargetID, !EnableEarlyIfConversion);
+ return applyDisable(TargetID, DisableEarlyIfConversion);
if (StandardID == &MachineLICMID)
return applyDisable(TargetID, DisableMachineLICM);
@@ -447,8 +447,8 @@ void TargetPassConfig::addMachinePasses() {
const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs"));
assert (TPI && IPI && "Pass ID not registered!");
- const char *TID = (char *)(TPI->getTypeInfo());
- const char *IID = (char *)(IPI->getTypeInfo());
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
insertPass(TID, IID);
}
@@ -456,7 +456,8 @@ void TargetPassConfig::addMachinePasses() {
printAndVerify("After Instruction Selection");
// Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
+ if (addPass(&ExpandISelPseudosID))
+ printAndVerify("After ExpandISelPseudos");
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
@@ -528,6 +529,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
// instructions dead.
addPass(&OptimizePHIsID);
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
// If the target requests it, assign local variables to stack slots relative
// to one another and simplify frame index references where possible.
addPass(&LocalStackSlotAllocationID);
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862..a795ac8 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SeenMoveImm = true;
} else {
Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
if (SeenMoveImm)
Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 7449ff5..d57bc73 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() {
ScheduleDAGInstrs::exitRegion();
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const {
dbgs() << "**** NOOP ****\n";
}
}
+#endif
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
TII = Fn.getTarget().getInstrInfo();
@@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
static int bbcnt = 0;
if (bbcnt++ % DebugDiv != DebugMod)
continue;
- dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+ dbgs() << "*** DEBUG scheduling " << Fn.getName()
<< ":BB#" << MBB->getNumber() << " ***\n";
}
#endif
@@ -488,7 +490,6 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
BitVector killedRegs(TRI->getNumRegs());
- BitVector ReservedRegs = TRI->getReservedRegs(MF);
StartBlockForKills(MBB);
@@ -529,7 +530,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
unsigned Reg = MO.getReg();
- if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
bool kill = false;
if (!killedRegs.test(Reg)) {
@@ -564,7 +565,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
- if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
LiveRegs.set(Reg);
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 34d075c..e4e18c3 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
- << "********** Function: "
- << ((Value*)MF.getFunction())->getName() << '\n');
+ << "********** Function: " << MF.getName() << '\n');
bool Changed = false;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index c791ffb..77554d6 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
placeCSRSpillsAndRestores(Fn);
// Add the code to save and restore the callee saved registers
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
insertCSRSpillsAndRestores(Fn);
// Allow the target machine to make final modifications to the function
@@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// called functions. Because of this, calculateCalleeSavedRegisters()
// must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
- if (!F->hasFnAttr(Attribute::Naked))
+ if (!F->getFnAttributes().hasAttribute(Attributes::Naked))
insertPrologEpilogCode(Fn);
// Replace all MO_FrameIndex operands with physical register references
@@ -221,13 +221,13 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
return;
// In Naked functions we aren't going to save any registers.
- if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+ if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked))
return;
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
// If the reg is modified, save it!
CSI.push_back(CalleeSavedInfo(Reg));
}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 3a03807..8a49609 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -20,7 +20,6 @@
#include "VirtRegMap.h"
#include "LiveRegMatrix.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
bool RABasic::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
<< "********** Function: "
- << ((Value*)mf.getFunction())->getName() << '\n');
+ << mf.getName() << '\n');
MF = &mf;
RegAllocBase::init(getAnalysis<VirtRegMap>(),
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 6b3a48e..8892216 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -113,9 +113,11 @@ namespace {
// PhysRegState - One of the RegState enums, or a virtreg.
std::vector<unsigned> PhysRegState;
- // UsedInInstr - BitVector of physregs that are used in the current
- // instruction, and so cannot be allocated.
- BitVector UsedInInstr;
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // UsedInInstr - Set of physregs that are used in the current instruction,
+ // and so cannot be allocated.
+ UsedInInstrSet UsedInInstr;
// SkippedInstrs - Descriptors of instructions whose clobber list was
// ignored because all registers were spilled. It is still necessary to
@@ -173,7 +175,7 @@ namespace {
unsigned VirtReg, unsigned Hint);
LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
- void spillAll(MachineInstr *MI);
+ void spillAll(MachineBasicBlock::iterator MI);
bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
void addRetOperands(MachineBasicBlock *MBB);
};
@@ -312,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
}
/// spillAll - Spill all dirty virtregs without killing them.
-void RAFast::spillAll(MachineInstr *MI) {
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
if (LiveVirtRegs.empty()) return;
isBulkSpilling = true;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
PhysRegState[PhysReg] = regFree;
// Fall through
case regFree:
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
return;
default:
@@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) {
"Instruction is not using a subregister of a reserved register");
// Leave the superregister in the working set.
PhysRegState[Alias] = regFree;
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
@@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
// All aliases are disabled, bring register into working set.
PhysRegState[PhysReg] = regFree;
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
}
@@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
/// reserved instead of allocated.
void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
RegState NewState) {
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
@@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// can be allocated directly.
// Returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
- if (UsedInInstr.test(PhysReg)) {
+ if (UsedInInstr.count(PhysReg)) {
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
}
@@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
unsigned Alias = *AI;
- if (UsedInInstr.test(Alias))
+ if (UsedInInstr.count(Alias))
return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
@@ -509,7 +511,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// Ignore invalid hints.
if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
- !RC->contains(Hint) || !RegClassInfo.isAllocatable(Hint)))
+ !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
Hint = 0;
// Take hint when possible.
@@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// First try to find a completely free register.
for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
@@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
if (ThroughRegs.count(PhysRegState[*AI]))
definePhysReg(MI, *AI, regFree);
}
@@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
}
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
- UsedInInstr.reset();
+ UsedInInstr.clear();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
@@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
<< " as used in instr\n");
- UsedInInstr.set(Reg);
+ UsedInInstr.insert(Reg);
}
// Also mark PartialDefs as used to avoid reallocation.
for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
- UsedInInstr.set(PartialDefs[i]);
+ UsedInInstr.insert(PartialDefs[i]);
}
/// addRetOperand - ensure that a return instruction has an operand for each
@@ -838,7 +840,7 @@ void RAFast::AllocateBasicBlock() {
// Add live-in registers as live.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- if (RegClassInfo.isAllocatable(*I))
+ if (MRI->isAllocatable(*I))
definePhysReg(MII, *I, regReserved);
SmallVector<unsigned, 8> VirtDead;
@@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() {
}
// Track registers used by instruction.
- UsedInInstr.reset();
+ UsedInInstr.clear();
// First scan.
// Mark physreg uses and early clobbers as used.
@@ -954,6 +956,11 @@ void RAFast::AllocateBasicBlock() {
bool hasPhysDefs = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!Reg) continue;
@@ -970,7 +977,7 @@ void RAFast::AllocateBasicBlock() {
}
continue;
}
- if (!RegClassInfo.isAllocatable(Reg)) continue;
+ if (!MRI->isAllocatable(Reg)) continue;
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
@@ -1016,11 +1023,13 @@ void RAFast::AllocateBasicBlock() {
}
}
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
- UsedInInstr.reset();
+ UsedInInstr.clear();
if (hasEarlyClobbers) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -1030,7 +1039,7 @@ void RAFast::AllocateBasicBlock() {
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
}
}
@@ -1058,7 +1067,7 @@ void RAFast::AllocateBasicBlock() {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!RegClassInfo.isAllocatable(Reg)) continue;
+ if (!MRI->isAllocatable(Reg)) continue;
definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
regFree : regReserved);
continue;
@@ -1080,7 +1089,9 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1110,8 +1121,7 @@ void RAFast::AllocateBasicBlock() {
///
bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
- << "********** Function: "
- << ((Value*)Fn.getFunction())->getName() << '\n');
+ << "********** Function: " << Fn.getName() << '\n');
MF = &Fn;
MRI = &MF->getRegInfo();
TM = &Fn.getTarget();
@@ -1119,7 +1129,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TII = TM->getInstrInfo();
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
- UsedInInstr.resize(TRI->getNumRegs());
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegs());
assert(!MRI->isSSA() && "regalloc requires leaving SSA");
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428..06f69c1e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -24,7 +24,6 @@
#include "VirtRegMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
#include "llvm/PassAnalysisSupport.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
@@ -331,9 +330,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
AU.addPreserved<LiveDebugVariables>();
- AU.addRequired<CalculateSpillWeights>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
+ AU.addRequired<CalculateSpillWeights>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -509,7 +508,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
///
/// @param VirtReg Live range that is about to be assigned.
/// @param PhysReg Desired register for assignment.
-/// @prarm IsHint True when PhysReg is VirtReg's preferred register.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
@@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
- << "********** Function: "
- << ((Value*)mf.getFunction())->getName() << '\n');
+ << "********** Function: " << mf.getName() << '\n');
MF = &mf;
if (VerifyEnabled)
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index d0db26b..02ebce7 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -118,7 +118,6 @@ private:
typedef std::vector<AllowedSet> AllowedSetMap;
typedef std::pair<unsigned, unsigned> RegPair;
typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
typedef std::set<unsigned> RegSet;
@@ -192,7 +191,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
const MachineLoopInfo *loopInfo,
const RegSet &vregs) {
- typedef std::vector<const LiveInterval*> LIVector;
LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
MachineRegisterInfo *mri = &mf->getRegInfo();
const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
@@ -209,8 +207,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
mri->setPhysRegUsed(Reg);
}
- BitVector reservedRegs = tri->getReservedRegs(*mf);
-
// Iterate over vregs.
for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
vregItr != vregEnd; ++vregItr) {
@@ -219,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
LiveInterval *vregLI = &LIS->getInterval(vreg);
// Record any overlaps with regmask operands.
- BitVector regMaskOverlaps(tri->getNumRegs());
+ BitVector regMaskOverlaps;
LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps);
// Compute an initial allowed set for the current vreg.
@@ -228,7 +224,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
- if (reservedRegs.test(preg))
+ if (mri->isReserved(preg))
continue;
// vregLI crosses a regmask operand that clobbers preg.
@@ -358,7 +354,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
loopInfo->getLoopDepth(mbb));
if (cp.isPhys()) {
- if (!lis->isAllocatable(dst)) {
+ if (!mf->getRegInfo().isAllocatable(dst)) {
continue;
}
@@ -433,6 +429,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
//au.addRequiredID(SplitCriticalEdgesID);
if (customPassID)
au.addRequiredID(*customPassID);
@@ -444,6 +441,7 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
au.addRequired<VirtRegMap>();
+ au.addPreserved<VirtRegMap>();
MachineFunctionPass::getAnalysisUsage(au);
}
@@ -556,7 +554,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
mri->freezeReservedRegs(MF);
- DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
// Allocator main loop:
//
@@ -570,11 +568,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Find the vreg intervals in need of allocation.
findVRegIntervalsToAlloc();
+#ifndef NDEBUG
const Function* func = mf->getFunction();
std::string fqn =
func->getParent()->getModuleIdentifier() + "." +
func->getName().str();
- (void)fqn;
+#endif
// If there are non-empty intervals allocate them using pbqp.
if (!vregsToAlloc.empty()) {
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 652bc30..805d235 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -15,8 +15,9 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -57,10 +58,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
CalleeSaved = CSR;
// Different reserved registers?
- BitVector RR = TRI->getReservedRegs(*MF);
- if (RR != Reserved)
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
Update = true;
- Reserved = RR;
+ Reserved = RR;
+ }
// Invalidate cached information from previous function.
if (Update)
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 9906334..2538f10 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -55,6 +55,8 @@ STATISTIC(numCommutes , "Number of instruction commuting performed");
STATISTIC(numExtends , "Number of copies extended");
STATISTIC(NumReMats , "Number of instructions re-materialized");
STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
static cl::opt<bool>
EnableJoining("join-liveintervals",
@@ -123,6 +125,9 @@ namespace {
/// can use this information below to update aliases.
bool joinIntervals(CoalescerPair &CP);
+ /// Attempt joining two virtual registers. Return true on success.
+ bool joinVirtRegs(CoalescerPair &CP);
+
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
@@ -193,12 +198,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
char RegisterCoalescer::ID = 0;
-static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
- if (!a) return b;
- if (!b) return a;
- return tri.composeSubRegIndices(a, b);
-}
-
static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
unsigned &Src, unsigned &Dst,
unsigned &SrcSub, unsigned &DstSub) {
@@ -209,8 +208,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
SrcSub = MI->getOperand(1).getSubReg();
} else if (MI->isSubregToReg()) {
Dst = MI->getOperand(0).getReg();
- DstSub = compose(tri, MI->getOperand(0).getSubReg(),
- MI->getOperand(3).getImm());
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
Src = MI->getOperand(2).getReg();
SrcSub = MI->getOperand(2).getSubReg();
} else
@@ -349,7 +348,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (DstReg != Dst)
return false;
// Registers match, do the subregisters line up?
- return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub);
+ return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+ TRI.composeSubRegIndices(DstIdx, DstSub);
}
}
@@ -425,7 +425,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
- if (!CP.isCoalescable(ACopyMI))
+ // Don't allow any partial copies, even if isCoalescable() allows them.
+ if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
return false;
// Get the LiveRange in IntB that this value number starts with.
@@ -583,7 +584,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
unsigned NewReg = NewDstMO.getReg();
- if (NewReg != IntB.reg || !NewDstMO.isKill())
+ if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
return false;
// Make sure there are no other definitions of IntB that would reach the
@@ -849,8 +850,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// Update LiveDebugVariables.
LDV->renameRegister(SrcReg, DstReg, SubIdx);
+ SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI))
+ continue;
+
SmallVector<unsigned,8> Ops;
bool Reads, Writes;
tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
@@ -890,7 +900,7 @@ bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
/// Always join simple intervals that are defined by a single copy from a
/// reserved register. This doesn't increase register pressure, so it is
/// always beneficial.
- if (!RegClassInfo.isReserved(CP.getDstReg())) {
+ if (!MRI->isReserved(CP.getDstReg())) {
DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
return false;
}
@@ -1065,7 +1075,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
/// Attempt joining with a reserved physreg.
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
assert(CP.isPhys() && "Must be a physreg copy");
- assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register");
+ assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
<< '\n');
@@ -1102,347 +1112,797 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return true;
}
-/// ComputeUltimateVN - Assuming we are going to join two live intervals,
-/// compute what the resultant value numbers for each value in the input two
-/// ranges will be. This is complicated by copies between the two which can
-/// and will commonly cause multiple value numbers to be merged into one.
-///
-/// VN is the value number that we're trying to resolve. InstDefiningValue
-/// keeps track of the new InstDefiningValue assignment for the result
-/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of
-/// whether a value in this or other is a copy from the opposite set.
-/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
-/// already been assigned.
-///
-/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
-/// contains the value number the copy is from.
-///
-static unsigned ComputeUltimateVN(VNInfo *VNI,
- SmallVector<VNInfo*, 16> &NewVNInfo,
- DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
- DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
- SmallVector<int, 16> &ThisValNoAssignments,
- SmallVector<int, 16> &OtherValNoAssignments) {
- unsigned VN = VNI->id;
-
- // If the VN has already been computed, just return it.
- if (ThisValNoAssignments[VN] >= 0)
- return ThisValNoAssignments[VN];
- assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
-
- // If this val is not a copy from the other val, then it must be a new value
- // number in the destination.
- DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
- if (I == ThisFromOther.end()) {
- NewVNInfo.push_back(VNI);
- return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
- }
- VNInfo *OtherValNo = I->second;
-
- // Otherwise, this *is* a copy from the RHS. If the other side has already
- // been computed, return it.
- if (OtherValNoAssignments[OtherValNo->id] >= 0)
- return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-
- // Mark this value number as currently being computed, then ask what the
- // ultimate value # of the other value is.
- ThisValNoAssignments[VN] = -2;
- unsigned UltimateVN =
- ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
- OtherValNoAssignments, ThisValNoAssignments);
- return ThisValNoAssignments[VN] = UltimateVN;
-}
+//===----------------------------------------------------------------------===//
+// Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+// identity copy after joining SrcReg and DstReg. The copy instruction will
+// be removed, and the value will be merged with the source value.
+//
+// There can be several copies back and forth, causing many values to be
+// merged into one. We compute a list of ultimate values in the joined live
+// range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+// predecessors have a live out value. It doesn't cause real interference,
+// and can be merged into the value it overlaps. Like a coalescable copy, it
+// can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+// is already in the other register. This is like a coalescable copy, but
+// the live range of the source register must be trimmed after erasing the
+// copy instruction:
+//
+// %src = COPY %ext
+// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+// defining one lane at a time:
+//
+// %dst:ssub0<def,read-undef> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// The live range of %src overlaps the %dst value defined by FOO, but
+// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+// which was undef anyway.
+//
+// The value mapping is more complicated in this case. The final live range
+// will have different value numbers for both FOO and BAR, but there is no
+// simple mapping from old to new values. It may even be necessary to add
+// new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+// is live, but never read. This can happen because we don't compute
+// individual live ranges per lane.
+//
+// %dst<def> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// This kind of interference is only resolved locally. If the clobbered
+// lane value escapes the block, the join is aborted.
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair.
+class JoinVals {
+ LiveInterval &LI;
+
+ // Location of this register in the final joined register.
+ // Either CP.DstIdx or CP.SrcIdx.
+ unsigned SubIdx;
+
+ // Values that will be present in the final live range.
+ SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+ const CoalescerPair &CP;
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ const TargetRegisterInfo *TRI;
+
+ // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
+ // This is suitable for passing to LiveInterval::join().
+ SmallVector<int, 8> Assignments;
+
+ // Conflict resolution for overlapping values.
+ enum ConflictResolution {
+ // No overlap, simply keep this value.
+ CR_Keep,
+
+ // Merge this value into OtherVNI and erase the defining instruction.
+ // Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ // values.
+ CR_Erase,
+
+ // Merge this value into OtherVNI but keep the defining instruction.
+ // This is for the special case where OtherVNI is defined by the same
+ // instruction.
+ CR_Merge,
+
+ // Keep this value, and have it replace OtherVNI where possible. This
+ // complicates value mapping since OtherVNI maps to two different values
+ // before and after this def.
+ // Used when clobbering undefined or dead lanes.
+ CR_Replace,
+
+ // Unresolved conflict. Visit later when all values have been mapped.
+ CR_Unresolved,
+
+ // Unresolvable conflict. Abort the join.
+ CR_Impossible
+ };
-// Find out if we have something like
-// A = X
-// B = X
-// if so, we can pretend this is actually
-// A = X
-// B = A
-// which allows us to coalesce A and B.
-// VNI is the definition of B. LR is the life range of A that includes
-// the slot just before B. If we return true, we add "B = X" to DupCopies.
-// This implies that A dominates B.
-static bool RegistersDefinedFromSameValue(LiveIntervals &li,
- const TargetRegisterInfo &tri,
- CoalescerPair &CP,
- VNInfo *VNI,
- VNInfo *OtherVNI,
- SmallVector<MachineInstr*, 8> &DupCopies) {
- // FIXME: This is very conservative. For example, we don't handle
- // physical registers.
-
- MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
-
- if (!MI || CP.isPartial() || CP.isPhys())
- return false;
+ // Per-value info for LI. The lane bit masks are all relative to the final
+ // joined register, so they can be compared directly between SrcReg and
+ // DstReg.
+ struct Val {
+ ConflictResolution Resolution;
- unsigned A = CP.getDstReg();
- if (!TargetRegisterInfo::isVirtualRegister(A))
- return false;
+ // Lanes written by this def, 0 for unanalyzed values.
+ unsigned WriteLanes;
- unsigned B = CP.getSrcReg();
- if (!TargetRegisterInfo::isVirtualRegister(B))
- return false;
+ // Lanes with defined values in this register. Other lanes are undef and
+ // safe to clobber.
+ unsigned ValidLanes;
- MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def);
- if (!OtherMI)
- return false;
+ // Value in LI being redefined by this def.
+ VNInfo *RedefVNI;
- if (MI->isImplicitDef()) {
- DupCopies.push_back(MI);
- return true;
- } else {
- if (!MI->isFullCopy())
- return false;
- unsigned Src = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Src))
- return false;
- if (!OtherMI->isFullCopy())
- return false;
- unsigned OtherSrc = OtherMI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(OtherSrc))
- return false;
+ // Value in the other live range that overlaps this def, if any.
+ VNInfo *OtherVNI;
- if (Src != OtherSrc)
- return false;
+ // Is this value an IMPLICIT_DEF?
+ bool IsImplicitDef;
- // If the copies use two different value numbers of X, we cannot merge
- // A and B.
- LiveInterval &SrcInt = li.getInterval(Src);
- // getVNInfoBefore returns NULL for undef copies. In this case, the
- // optimization is still safe.
- if (SrcInt.getVNInfoBefore(OtherVNI->def) !=
- SrcInt.getVNInfoBefore(VNI->def))
- return false;
+ // True when the live range of this value will be pruned because of an
+ // overlapping CR_Replace value in the other live range.
+ bool Pruned;
- DupCopies.push_back(MI);
- return true;
- }
-}
+ // True once Pruned above has been computed.
+ bool PrunedComputed;
-/// joinIntervals - Attempt to join these two intervals. On failure, this
-/// returns false.
-bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
- // Handle physreg joins separately.
- if (CP.isPhys())
- return joinReservedPhysReg(CP);
+ Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+ RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
+ PrunedComputed(false) {}
- LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
- DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
- << '\n');
+ bool isAnalyzed() const { return WriteLanes != 0; }
+ };
- // Compute the final value assignment, assuming that the live ranges can be
- // coalesced.
- SmallVector<int, 16> LHSValNoAssignments;
- SmallVector<int, 16> RHSValNoAssignments;
- DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
- DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
- SmallVector<VNInfo*, 16> NewVNInfo;
+ // One entry per value number in LI.
+ SmallVector<Val, 8> Vals;
+
+ unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
+ VNInfo *stripCopies(VNInfo *VNI);
+ ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+ void computeAssignment(unsigned ValNo, JoinVals &Other);
+ bool taintExtent(unsigned, unsigned, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+ bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
+ bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+ JoinVals(LiveInterval &li, unsigned subIdx,
+ SmallVectorImpl<VNInfo*> &newVNInfo,
+ const CoalescerPair &cp,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri)
+ : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
+ Indexes(LIS->getSlotIndexes()), TRI(tri),
+ Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
+ {}
+
+ /// Analyze defs in LI and compute a value mapping in NewVNInfo.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool mapValues(JoinVals &Other);
+
+ /// Try to resolve conflicts that require all values to be mapped.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool resolveConflicts(JoinVals &Other);
+
+ /// Prune the live range of values in Other.LI where they would conflict with
+ /// CR_Replace values in LI. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
+
+ /// Erase any machine instructions that have been coalesced away.
+ /// Add erased instructions to ErasedInstrs.
+ /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+ /// the erased instrs.
+ void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs);
+
+ /// Get the value assignments suitable for passing to LiveInterval::join.
+ const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+/// Compute the bitmask of lanes actually written by DefMI.
+/// Set Redef if there are any partial register definitions that depend on the
+/// previous value of the register.
+unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
+ unsigned L = 0;
+ for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
+ continue;
+ L |= TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
+ if (MO->readsReg())
+ Redef = true;
+ }
+ return L;
+}
- SmallVector<MachineInstr*, 8> DupCopies;
- SmallVector<MachineInstr*, 8> DeadCopies;
+/// Find the ultimate value that VNI was copied from.
+VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
+ while (!VNI->isPHIDef()) {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
+ assert(MI && "No defining instruction");
+ if (!MI->isFullCopy())
+ break;
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ break;
+ LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
+ if (!LRQ.valueIn())
+ break;
+ VNI = LRQ.valueIn();
+ }
+ return VNI;
+}
- LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg());
- DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS
- << '\n');
+/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+/// Return a conflict resolution when possible, but leave the hard cases as
+/// CR_Unresolved.
+/// Recursively calls computeAssignment() on this and Other, guaranteeing that
+/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+/// The recursion always goes upwards in the dominator tree, making loops
+/// impossible.
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ assert(!V.isAnalyzed() && "Value has already been analyzed!");
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ if (VNI->isUnused()) {
+ V.WriteLanes = ~0u;
+ return CR_Keep;
+ }
- // Loop over the value numbers of the LHS, seeing if any are defined from
- // the RHS.
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->isPHIDef())
- continue;
- MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
- assert(MI && "Missing def");
- if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
- continue;
+ // Get the instruction defining this value, compute the lanes written.
+ const MachineInstr *DefMI = 0;
+ if (VNI->isPHIDef()) {
+ // Conservatively assume that all lanes in a PHI are valid.
+ V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ DefMI = Indexes->getInstructionFromIndex(VNI->def);
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1<def> = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
+ assert(V.RedefVNI && "Instruction is reading nonexistent value");
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
- // Figure out the value # from the RHS.
- VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def);
- // The copy could be to an aliased physreg.
- if (!OtherVNI)
- continue;
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ V.IsImplicitDef = true;
+ V.ValidLanes &= ~V.WriteLanes;
+ }
+ }
- // DstReg is known to be a register in the LHS interval. If the src is
- // from the RHS interval, we can use its value #.
- if (CP.isCoalescable(MI))
- DeadCopies.push_back(MI);
- else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
- DupCopies))
- continue;
+ // Find the value in Other that overlaps VNI->def, if any.
+ LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
+
+ // It is possible that both values are defined by the same instruction, or
+ // the values are PHIs defined in the same block. When that happens, the two
+ // values should be merged into one, but not into any preceding value.
+ // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+ if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+ assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+ // One value stays, the other is merged. Keep the earlier one, or the first
+ // one we see.
+ if (OtherVNI->def < VNI->def)
+ Other.computeAssignment(OtherVNI->id, *this);
+ else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+ // This is an early-clobber def overlapping a live-in value in the other
+ // register. Not mergeable.
+ V.OtherVNI = OtherLRQ.valueIn();
+ return CR_Impossible;
+ }
+ V.OtherVNI = OtherVNI;
+ Val &OtherV = Other.Vals[OtherVNI->id];
+ // Keep this value, check for conflicts when analyzing OtherVNI.
+ if (!OtherV.isAnalyzed())
+ return CR_Keep;
+ // Both sides have been analyzed now.
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Merge;
+ if (V.ValidLanes & OtherV.ValidLanes)
+ // Overlapping lanes can't be resolved.
+ return CR_Impossible;
+ else
+ return CR_Merge;
+ }
- LHSValsDefinedFromRHS[VNI] = OtherVNI;
+ // No simultaneous def. Is Other live at the def?
+ V.OtherVNI = OtherLRQ.valueIn();
+ if (!V.OtherVNI)
+ // No overlap, no conflict.
+ return CR_Keep;
+
+ assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+ // We have overlapping values, or possibly a kill of Other.
+ // Recursively compute assignments up the dominator tree.
+ Other.computeAssignment(V.OtherVNI->id, *this);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Replace;
+
+ // Check for simple erasable conflicts.
+ if (DefMI->isImplicitDef())
+ return CR_Erase;
+
+ // Include the non-conflict where DefMI is a coalescable copy that kills
+ // OtherVNI. We still want the copy erased and value numbers merged.
+ if (CP.isCoalescable(DefMI)) {
+ // Some of the lanes copied from OtherVNI may be undef, making them undef
+ // here too.
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+ return CR_Erase;
}
- // Loop over the value numbers of the RHS, seeing if any are defined from
- // the LHS.
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->isPHIDef())
- continue;
- MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
- assert(MI && "Missing def");
- if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy?
- continue;
+ // This may not be a real conflict if DefMI simply kills Other and defines
+ // VNI.
+ if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+ return CR_Keep;
+
+ // Handle the case where VNI and OtherVNI can be proven to be identical:
+ //
+ // %other = COPY %ext
+ // %this = COPY %ext <-- Erase this copy
+ //
+ if (DefMI->isFullCopy() && !CP.isPartial() &&
+ stripCopies(VNI) == stripCopies(V.OtherVNI))
+ return CR_Erase;
+
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
+ // 4 BAZ %dst<kill>
+ // 5 QUUX %src<kill>
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ return CR_Replace;
+
+ // If the other live range is killed by DefMI and the live ranges are still
+ // overlapping, it must be because we're looking at an early clobber def:
+ //
+ // %dst<def,early-clobber> = ASM %src<kill>
+ //
+ // In this case, it is illegal to merge the two live ranges since the early
+ // clobber def would clobber %src before it was read.
+ if (OtherLRQ.isKill()) {
+ // This case where the def doesn't overlap the kill is handled above.
+ assert(VNI->def.isEarlyClobber() &&
+ "Only early clobber defs can overlap a kill");
+ return CR_Impossible;
+ }
- // Figure out the value # from the LHS.
- VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def);
- // The copy could be to an aliased physreg.
- if (!OtherVNI)
- continue;
+ // VNI is clobbering live lanes in OtherVNI, but there is still the
+ // possibility that no instructions actually read the clobbered lanes.
+ // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+ // Otherwise Other.LI wouldn't be live here.
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+ return CR_Impossible;
+
+ // We need to verify that no instructions are reading the clobbered lanes. To
+ // save compile time, we'll only check that locally. Don't allow the tainted
+ // value to escape the basic block.
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+ return CR_Impossible;
+
+ // There are still some things that could go wrong besides clobbered lanes
+ // being read, for example OtherVNI may be only partially redefined in MBB,
+ // and some clobbered lanes could escape the block. Save this analysis for
+ // resolveConflicts() when all values have been mapped. We need to know
+ // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+ // that now - the recursive analyzeValue() calls must go upwards in the
+ // dominator tree.
+ return CR_Unresolved;
+}
- // DstReg is known to be a register in the RHS interval. If the src is
- // from the LHS interval, we can use its value #.
- if (CP.isCoalescable(MI))
- DeadCopies.push_back(MI);
- else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI,
- DupCopies))
- continue;
+/// Compute the value assignment for ValNo in LI.
+/// This may be called recursively by analyzeValue(), but never for a ValNo on
+/// the stack.
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.isAnalyzed()) {
+ // Recursion should always move up the dominator tree, so ValNo is not
+ // supposed to reappear before it has been assigned.
+ assert(Assignments[ValNo] != -1 && "Bad recursion?");
+ return;
+ }
+ switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+ case CR_Erase:
+ case CR_Merge:
+ // Merge this ValNo into OtherVNI.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+ assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+ Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+ DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
+ << LI.getValNumInfo(ValNo)->def << " into "
+ << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ break;
+ case CR_Replace:
+ case CR_Unresolved:
+ // The other value is going to be pruned if this join is successful.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+ Other.Vals[V.OtherVNI->id].Pruned = true;
+ // Fall through.
+ default:
+ // This value number needs to go in the final joined live range.
+ Assignments[ValNo] = NewVNInfo.size();
+ NewVNInfo.push_back(LI.getValNumInfo(ValNo));
+ break;
+ }
+}
- RHSValsDefinedFromLHS[VNI] = OtherVNI;
+bool JoinVals::mapValues(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ computeAssignment(i, Other);
+ if (Vals[i].Resolution == CR_Impossible) {
+ DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ return false;
+ }
}
+ return true;
+}
- LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
- RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
- NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
+/// the extent of the tainted lanes in the block.
+///
+/// Multiple values in Other.LI can be affected since partial redefinitions can
+/// preserve previously tainted lanes.
+///
+/// 1 %dst = VLOAD <-- Define all lanes in %dst
+/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+///
+/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+/// entry to TaintedVals.
+///
+/// Returns false if the tainted lanes extend beyond the basic block.
+bool JoinVals::
+taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+ VNInfo *VNI = LI.getValNumInfo(ValNo);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+ // Scan Other.LI from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
+ assert(OtherI != Other.LI.end() && "No conflict?");
+ do {
+ // OtherI is pointing to a tainted value. Abort the join if the tainted
+ // lanes escape the block.
+ SlotIndex End = OtherI->end;
+ if (End >= MBBEnd) {
+ DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
+ return false;
+ }
+ DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start
+ << " to " << End << '\n');
+ // A dead def is not a problem.
+ if (End.isDead())
+ break;
+ TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+ // Check for another def in the MBB.
+ if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
+ break;
+
+ // Lanes written by the new def are no longer tainted.
+ const Val &OV = Other.Vals[OtherI->valno->id];
+ TaintedLanes &= ~OV.WriteLanes;
+ if (!OV.RedefVNI)
+ break;
+ } while (TaintedLanes);
+ return true;
+}
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+/// Return true if MI uses any of the given Lanes from Reg.
+/// This does not include partial redefinitions of Reg.
+bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+ unsigned Lanes) {
+ if (MI->isDebugValue())
+ return false;
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
continue;
- ComputeUltimateVN(VNI, NewVNInfo,
- LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
- LHSValNoAssignments, RHSValNoAssignments);
- }
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ if (!MO->readsReg())
continue;
- // If this value number isn't a copy from the LHS, it's a new number.
- if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
- NewVNInfo.push_back(VNI);
- RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ if (Lanes & TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
+ return true;
+ }
+ return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+ if (V.Resolution != CR_Unresolved)
continue;
- }
+ DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
+ << '@' << LI.getValNumInfo(i)->def << '\n');
+ ++NumLaneConflicts;
+ assert(V.OtherVNI && "Inconsistent conflict resolution.");
+ VNInfo *VNI = LI.getValNumInfo(i);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+ // join, those lanes will be tainted with a wrong value. Get the extent of
+ // the tainted lanes.
+ unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+ if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+ // Tainted lanes would extend beyond the basic block.
+ return false;
- ComputeUltimateVN(VNI, NewVNInfo,
- RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
- RHSValNoAssignments, LHSValNoAssignments);
- }
+ assert(!TaintExtent.empty() && "There should be at least one conflict.");
- // Armed with the mappings of LHS/RHS values to ultimate values, walk the
- // interval lists to see if these intervals are coalescable.
- LiveInterval::const_iterator I = LHS.begin();
- LiveInterval::const_iterator IE = LHS.end();
- LiveInterval::const_iterator J = RHS.begin();
- LiveInterval::const_iterator JE = RHS.end();
-
- // Collect interval end points that will no longer be kills.
- SmallVector<MachineInstr*, 8> LHSOldKills;
- SmallVector<MachineInstr*, 8> RHSOldKills;
-
- // Skip ahead until the first place of potential sharing.
- if (I != IE && J != JE) {
- if (I->start < J->start) {
- I = std::upper_bound(I, IE, J->start);
- if (I != LHS.begin()) --I;
- } else if (J->start < I->start) {
- J = std::upper_bound(J, JE, I->start);
- if (J != RHS.begin()) --J;
+ // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ MachineBasicBlock::iterator MI = MBB->begin();
+ if (!VNI->isPHIDef()) {
+ MI = Indexes->getInstructionFromIndex(VNI->def);
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
}
- }
-
- while (I != IE && J != JE) {
- // Determine if these two live ranges overlap.
- // If so, check value # info to determine if they are really different.
- if (I->end > J->start && J->end > I->start) {
- // If the live range overlap will map to the same value number in the
- // result liverange, we can still coalesce them. If not, we can't.
- if (LHSValNoAssignments[I->valno->id] !=
- RHSValNoAssignments[J->valno->id])
+ assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+ "Interference ends on VNI->def. Should have been handled earlier");
+ MachineInstr *LastMI =
+ Indexes->getInstructionFromIndex(TaintExtent.front().first);
+ assert(LastMI && "Range must end at a proper instruction");
+ unsigned TaintNum = 0;
+ for(;;) {
+ assert(MI != MBB->end() && "Bad LastMI");
+ if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
+ DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
return false;
-
- // Extended live ranges should no longer be killed.
- if (!I->end.isBlock() && I->end < J->end)
- if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end))
- LHSOldKills.push_back(MI);
- if (!J->end.isBlock() && J->end < I->end)
- if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end))
- RHSOldKills.push_back(MI);
+ }
+ // LastMI is the last instruction to use the current value.
+ if (&*MI == LastMI) {
+ if (++TaintNum == TaintExtent.size())
+ break;
+ LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+ assert(LastMI && "Range must end at a proper instruction");
+ TaintedLanes = TaintExtent[TaintNum].second;
+ }
+ ++MI;
}
- if (I->end < J->end)
- ++I;
- else
- ++J;
- }
-
- // Clear kill flags where live ranges are extended.
- while (!LHSOldKills.empty())
- LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
- while (!RHSOldKills.empty())
- RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI);
-
- if (LHSValNoAssignments.empty())
- LHSValNoAssignments.push_back(-1);
- if (RHSValNoAssignments.empty())
- RHSValNoAssignments.push_back(-1);
-
- // Now erase all the redundant copies.
- for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) {
- MachineInstr *MI = DeadCopies[i];
- if (!ErasedInstrs.insert(MI))
- continue;
- DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI)
- << '\t' << *MI);
- LIS->RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
+ // The tainted lanes are unused.
+ V.Resolution = CR_Replace;
+ ++NumLaneResolves;
}
+ return true;
+}
- SmallVector<unsigned, 8> SourceRegisters;
- for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
- E = DupCopies.end(); I != E; ++I) {
- MachineInstr *MI = *I;
- if (!ErasedInstrs.insert(MI))
- continue;
+// Determine if ValNo is a copy of a value number in LI or Other.LI that will
+// be pruned:
+//
+// %dst = COPY %src
+// %src = COPY %dst <-- This value to be pruned.
+// %dst = COPY %src <-- This value is a copy of a pruned value.
+//
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.Pruned || V.PrunedComputed)
+ return V.Pruned;
+
+ if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+ return V.Pruned;
+
+ // Follow copies up the dominator tree and check if any intermediate value
+ // has been pruned.
+ V.PrunedComputed = true;
+ V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+ return V.Pruned;
+}
- // If MI is a copy, then we have pretended that the assignment to B in
- // A = X
- // B = X
- // was actually a copy from A. Now that we decided to coalesce A and B,
- // transform the code into
- // A = X
- // In the case of the implicit_def, we just have to remove it.
- if (!MI->isImplicitDef()) {
- unsigned Src = MI->getOperand(1).getReg();
- SourceRegisters.push_back(Src);
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ break;
+ case CR_Replace: {
+ // This value takes precedence over the value in Other.LI.
+ LIS->pruneValue(&Other.LI, Def, &EndPoints);
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
+ if (!Def.isBlock()) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
+ for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
+ MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+ MO->setIsUndef(EraseImpDef);
+ MO->setIsDead(false);
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
+ }
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
+ << ": " << Other.LI << '\n');
+ break;
+ }
+ case CR_Erase:
+ case CR_Merge:
+ if (isPrunedValue(i, Other)) {
+ // This value is ultimately a copy of a pruned value in LI or Other.LI.
+ // We can no longer trust the value mapping computed by
+ // computeAssignment(), the value that was originally copied could have
+ // been replaced.
+ LIS->pruneValue(&LI, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
+ << Def << ": " << LI << '\n');
+ }
+ break;
+ case CR_Unresolved:
+ case CR_Impossible:
+ llvm_unreachable("Unresolved conflicts");
}
- LIS->RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
}
+}
- // If B = X was the last use of X in a liverange, we have to shrink it now
- // that B = X is gone.
- for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
- E = SourceRegisters.end(); I != E; ++I) {
- LIS->shrinkToUses(&LIS->getInterval(*I));
+void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs) {
+ for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
+ // Get the def location before markUnused() below invalidates it.
+ SlotIndex Def = LI.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LI. Note that this VNInfo is still present
+ // in NewVNInfo, so it will appear as an unused value number in the final
+ // joined interval.
+ LI.getValNumInfo(i)->markUnused();
+ LI.removeValNo(LI.getValNumInfo(i));
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+ // FALL THROUGH.
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
}
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+ JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
+ JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
+
+ DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
+ << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
+ << '\n');
+
+ // First compute NewVNInfo and the simple value mappings.
+ // Detect impossible conflicts early.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ return false;
+
+ // Some conflicts can only be resolved after all values have been mapped.
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ return false;
- // If we get here, we know that we can coalesce the live ranges. Ask the
- // intervals to coalesce themselves now.
- LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ // All clear, the live ranges can be merged.
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints);
+ RHSVals.pruneValues(LHSVals, EndPoints);
+
+ // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+ // registers to require trimming.
+ SmallVector<unsigned, 8> ShrinkRegs;
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ while (!ShrinkRegs.empty())
+ LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+ // Join RHS into LHS.
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
MRI);
+
+ // Kill flags are going to be wrong if the live ranges were overlapping.
+ // Eventually, we should simply clear all kill flags when computing live
+ // ranges. They are reinserted after register allocation.
+ MRI->clearKillFlags(LHS.reg);
+ MRI->clearKillFlags(RHS.reg);
+
+ if (EndPoints.empty())
+ return true;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices(&LHS, EndPoints);
return true;
}
+/// joinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
namespace {
// DepthMBBCompare - Comparison predicate that sort first based on the loop
// depth of the basic block (the unsigned), and then on the MBB number.
@@ -1564,8 +2024,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
Loops = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
- << "********** Function: "
- << ((Value*)MF->getFunction())->getName() << '\n');
+ << "********** Function: " << MF->getName() << '\n');
if (VerifyCoalescing)
MF->verify(this, "Before register coalescing");
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
index 8a6df98..47c3df1 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -63,6 +63,13 @@ namespace llvm {
: TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+ /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+ /// No need to call setRegisters().
+ CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+
/// setRegisters - set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
bool setRegisters(const MachineInstr*);
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index 43448c8..543c426 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -63,7 +63,8 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC,
decreaseSetPressure(MaxSetPressure, RC, TRI);
}
-void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Live In: ";
for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) {
<< '\n';
}
}
+#endif
/// Increase the current pressure as impacted by these physical registers and
/// bump the high water mark if needed.
@@ -320,10 +322,8 @@ struct RegisterOperands {
if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
DeadDefs.push_back(MO.getReg());
}
- else {
- if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
- Defs.push_back(MO.getReg());
- }
+ else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
+ Defs.push_back(MO.getReg());
}
}
};
@@ -335,7 +335,7 @@ static void collectOperands(const MachineInstr *MI,
PhysRegOperands &PhysRegOpers,
VirtRegOperands &VirtRegOpers,
const TargetRegisterInfo *TRI,
- const RegisterClassInfo *RCI) {
+ const MachineRegisterInfo *MRI) {
for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
const MachineOperand &MO = *OperI;
if (!MO.isReg() || !MO.getReg())
@@ -343,7 +343,7 @@ static void collectOperands(const MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegOpers.collect(MO, TRI);
- else if (RCI->isAllocatable(MO.getReg()))
+ else if (MRI->isAllocatable(MO.getReg()))
PhysRegOpers.collect(MO, TRI);
}
// Remove redundant physreg dead defs.
@@ -449,7 +449,7 @@ bool RegPressureTracker::recede() {
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Boost pressure for all dead defs together.
increasePhysRegPressure(PhysRegOpers.DeadDefs);
@@ -522,7 +522,7 @@ bool RegPressureTracker::advance() {
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Kill liveness at last uses.
for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
@@ -664,7 +664,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Boost max pressure for all dead defs together.
// Since CurrSetPressure and MaxSetPressure
@@ -674,9 +674,16 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
// Kill liveness at live defs.
- decreasePhysRegPressure(PhysRegOpers.Defs);
- decreaseVirtRegPressure(VirtRegOpers.Defs);
-
+ for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = PhysRegOpers.Defs[i];
+ if (!findReg(Reg, false, PhysRegOpers.Uses, TRI))
+ decreasePhysRegPressure(PhysRegOpers.Defs);
+ }
+ for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
+ unsigned Reg = VirtRegOpers.Defs[i];
+ if (!findReg(Reg, true, VirtRegOpers.Uses, TRI))
+ decreaseVirtRegPressure(VirtRegOpers.Defs);
+ }
// Generate liveness for uses.
for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
unsigned Reg = PhysRegOpers.Uses[i];
@@ -750,7 +757,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Account for register pressure similar to RegPressureTracker::recede().
PhysRegOperands PhysRegOpers;
VirtRegOperands VirtRegOpers;
- collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI);
+ collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
// Kill liveness at last uses. Assume allocatable physregs are single-use
// rather than checking LiveIntervals.
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index d673794..5ec6564 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -92,9 +92,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
KillRegs.resize(NumPhysRegs);
DefRegs.resize(NumPhysRegs);
- // Create reserved registers bitvector.
- ReservedRegs = TRI->getReservedRegs(MF);
-
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
@@ -225,9 +222,9 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
used = RegsAvailable;
used.flip();
if (includeReserved)
- used |= ReservedRegs;
+ used |= MRI->getReservedRegs();
else
- used.reset(ReservedRegs);
+ used.reset(MRI->getReservedRegs());
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 752f8e4..9a65071 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -279,6 +279,7 @@ void SUnit::ComputeHeight() {
} while (!WorkList.empty());
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
/// a group of nodes flagged together.
void SUnit::dump(const ScheduleDAG *G) const {
@@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
}
dbgs() << "\n";
}
+#endif
#ifndef NDEBUG
/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 9c1dba3..a4d4a93 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,6 +31,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -44,14 +46,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineDominatorTree &mdt,
bool IsPostRAFlag,
LiveIntervals *lis)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
- InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
- IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
- LoopRegs(MDT), FirstDbgValue(0) {
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+ IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
"Virtual registers must be removed prior to PostRA scheduling");
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
}
/// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -68,7 +71,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
// object. We don't have to worry about the case where the
// object address is somehow being computed by the multiply,
// because our callers only care when the result is an
- // identifibale object.
+ // identifiable object.
if (U->getOpcode() != Instruction::Add ||
(!isa<ConstantInt>(U->getOperand(1)) &&
Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
@@ -135,10 +138,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
BB = bb;
- LoopRegs.Deps.clear();
- if (MachineLoop *ML = MLI.getLoopFor(BB))
- if (BB == ML->getLoopLatch())
- LoopRegs.VisitLoop(ML);
}
void ScheduleDAGInstrs::finishBlock() {
@@ -174,9 +173,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
EndIndex = endcount;
MISUnitMap.clear();
- // Check to see if the scheduler cares about latencies.
- UnitLatencies = forceUnitLatencies();
-
ScheduleDAG::clearDAG();
}
@@ -209,7 +205,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (Reg == 0) continue;
if (TRI->isPhysicalRegister(Reg))
- Uses[Reg].push_back(&ExitSU);
+ Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
else {
assert(!IsPostRA && "Virtual register encountered after regalloc.");
addVRegUseDeps(&ExitSU, i);
@@ -225,59 +221,44 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
if (!Uses.contains(Reg))
- Uses[Reg].push_back(&ExitSU);
+ Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
}
}
}
/// MO is an operand of SU's instruction that defines a physical register. Add
/// data dependencies from SU to any uses of the physical register.
-void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
- const MachineOperand &MO) {
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
assert(MO.isDef() && "expect physreg def");
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
- unsigned DataLatency = SU->Latency;
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
continue;
- std::vector<SUnit*> &UseList = Uses[*Alias];
+ std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
- SUnit *UseSU = UseList[i];
+ SUnit *UseSU = UseList[i].SU;
if (UseSU == SU)
continue;
- unsigned LDataLatency = DataLatency;
- // Optionally add in a special extra latency for nodes that
- // feed addresses.
- // TODO: Perhaps we should get rid of
- // SpecialAddressLatency and just move this into
- // adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies &&
- UseSU != &ExitSU) {
- MachineInstr *UseMI = UseSU->getInstr();
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
- assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
- if (RegUseIndex >= 0 &&
- (UseMI->mayLoad() || UseMI->mayStore()) &&
- (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
- UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
- LDataLatency += SpecialAddressLatency;
- }
- // Adjust the dependence latency using operand def/use
- // information (if any), and then allow the target to
- // perform its own adjustments.
- SDep dep(SU, SDep::Data, LDataLatency, *Alias);
- if (!UnitLatencies) {
- unsigned Latency = computeOperandLatency(SU, UseSU, dep);
- dep.setLatency(Latency);
-
- ST.adjustSchedDependency(SU, UseSU, dep);
- }
+
+ SDep dep(SU, SDep::Data, *Alias);
+
+ // Adjust the dependence latency using operand def/use information,
+ // then allow the target to perform its own adjustments.
+ int UseOp = UseList[i].OpIdx;
+ MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
+ dep.setLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/false));
+ dep.setMinLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp, /*FindMin=*/true));
+
+ ST.adjustSchedDependency(SU, UseSU, dep);
UseSU->addPred(dep);
}
}
@@ -301,20 +282,23 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
- std::vector<SUnit *> &DefList = Defs[*Alias];
+ std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
- SUnit *DefSU = DefList[i];
+ SUnit *DefSU = DefList[i].SU;
if (DefSU == &ExitSU)
continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias))) {
if (Kind == SDep::Anti)
- DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+ DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
else {
- unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
- DefSU->getInstr());
- DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
}
}
}
@@ -324,61 +308,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
- Uses[MO.getReg()].push_back(SU);
+ Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
}
else {
- addPhysRegDataDeps(SU, MO);
+ addPhysRegDataDeps(SU, OperIdx);
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's defs.
- std::vector<SUnit *> &DefList = Defs[MO.getReg()];
-
- // If a def is going to wrap back around to the top of the loop,
- // backschedule it.
- if (!UnitLatencies && DefList.empty()) {
- LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
- if (I != LoopRegs.Deps.end()) {
- const MachineOperand *UseMO = I->second.first;
- unsigned Count = I->second.second;
- const MachineInstr *UseMI = UseMO->getParent();
- unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
- const MCInstrDesc &UseMCID = UseMI->getDesc();
- const TargetSubtargetInfo &ST =
- TM.getSubtarget<TargetSubtargetInfo>();
- unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
- // TODO: If we knew the total depth of the region here, we could
- // handle the case where the whole loop is inside the region but
- // is large enough that the isScheduleHigh trick isn't needed.
- if (UseMOIdx < UseMCID.getNumOperands()) {
- // Currently, we only support scheduling regions consisting of
- // single basic blocks. Check to see if the instruction is in
- // the same region by checking to see if it has the same parent.
- if (UseMI->getParent() != MI->getParent()) {
- unsigned Latency = SU->Latency;
- if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
- Latency += SpecialAddressLatency;
- // This is a wild guess as to the portion of the latency which
- // will be overlapped by work done outside the current
- // scheduling region.
- Latency -= std::min(Latency, Count);
- // Add the artificial edge.
- ExitSU.addPred(SDep(SU, SDep::Order, Latency,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- } else if (SpecialAddressLatency > 0 &&
- UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
- // The entire loop body is within the current scheduling region
- // and the latency of this operation is assumed to be greater
- // than the latency of the loop.
- // TODO: Recursively mark data-edge predecessors as
- // isScheduleHigh too.
- SU->isScheduleHigh = true;
- }
- }
- LoopRegs.Deps.erase(I);
- }
- }
+ std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
// clear this register's use list
if (Uses.contains(MO.getReg()))
@@ -393,11 +330,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// the block. Instead, we leave only one call at the back of the
// DefList.
if (SU->isCall) {
- while (!DefList.empty() && DefList.back()->isCall)
+ while (!DefList.empty() && DefList.back().SU->isCall)
DefList.pop_back();
}
// Defs are pushed in the order they are visited and never reordered.
- DefList.push_back(SU);
+ DefList.push_back(PhysRegSUOper(SU, OperIdx));
}
}
@@ -430,9 +367,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
else {
SUnit *DefSU = DefI->SU;
if (DefSU != SU && DefSU != &ExitSU) {
- unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
- DefSU->getInstr());
- DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+ SDep Dep(SU, SDep::Output, Reg);
+ unsigned OutLatency =
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
+ Dep.setMinLatency(OutLatency);
+ Dep.setLatency(OutLatency);
+ DefSU->addPred(Dep);
}
DefI->SU = SU;
}
@@ -462,18 +402,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
if (DefSU) {
// The reaching Def lives within this scheduling region.
// Create a data dependence.
- //
- // TODO: Handle "special" address latencies cleanly.
- SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
- if (!UnitLatencies) {
- // Adjust the dependence latency using operand def/use information, then
- // allow the target to perform its own adjustments.
- unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
- dep.setLatency(Latency);
-
- const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
- ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
- }
+ SDep dep(DefSU, SDep::Data, Reg);
+ // Adjust the dependence latency using operand def/use information, then
+ // allow the target to perform its own adjustments.
+ int DefOp = Def->findRegisterDefOperandIdx(Reg);
+ dep.setLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+ dep.setMinLatency(
+ SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
SU->addPred(dep);
}
}
@@ -481,14 +420,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
// Add antidependence to the following def of the vreg it uses.
VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
if (DefI != VRegDefs.end() && DefI->SU != SU)
- DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+ DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
}
/// Return true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasVolatileMemoryRef() &&
+ (MI->hasOrderedMemoryRef() &&
(!MI->mayLoad() || !MI->isInvariantLoad(AA))))
return true;
return false;
@@ -621,8 +560,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
// and stop descending.
if (*Depth > 200 ||
MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
- SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ SUb->addPred(SDep(SUa, SDep::MayAliasMem));
return *Depth;
}
// Track current depth.
@@ -653,9 +591,9 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
if (SU == *I)
continue;
if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
- unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
- (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
- /*isNormalMemory=*/true));
+ SDep Dep(SU, SDep::MayAliasMem);
+ Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+ (*I)->addPred(Dep);
}
// Now go through all the chain successors and iterate from them.
// Keep track of visited nodes.
@@ -678,9 +616,11 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
if (!EnableAASchedMI ||
- MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
- SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
- isNormalMemory));
+ MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ SUb->addPred(Dep);
+ }
else {
// Duplicate entries should be ignored.
RejectList.insert(SUb);
@@ -718,10 +658,7 @@ void ScheduleDAGInstrs::initSUnits() {
SU->isCommutable = MI->isCommutable();
// Assign the Latency field of SU using target-provided information.
- if (UnitLatencies)
- SU->Latency = 1;
- else
- computeLatency(SU);
+ SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
}
}
@@ -825,16 +762,19 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ I->second->addPred(SDep(SU, SDep::Barrier));
}
for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+ SDep Dep(SU, SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ I->second[i]->addPred(Dep);
+ }
}
// Add SU to the barrier chain.
if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
@@ -922,7 +862,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// SU and barrier _could_ be reordered, they should not. In addition,
// we have lost all RejectMemNodes below barrier.
if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
} else {
// Treat all other stores conservatively.
goto new_alias_chain;
@@ -931,10 +871,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (!ExitSU.isPred(SU))
// Push store's up a bit to avoid them getting in between cmp
// and branches.
- ExitSU.addPred(SDep(SU, SDep::Order, 0,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
+ ExitSU.addPred(SDep(SU, SDep::Artificial));
} else if (MI->mayLoad()) {
bool MayAlias = true;
if (MI->isInvariantLoad(AA)) {
@@ -969,7 +906,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (MayAlias && AliasChain)
addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
}
}
@@ -982,34 +919,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
PendingLoads.clear();
}
-void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
- // Compute the latency for the node. We only provide a default for missing
- // itineraries. Empty itineraries still have latency properties.
- if (!InstrItins) {
- SU->Latency = 1;
-
- // Simplistic target-independent heuristic: assume that loads take
- // extra time.
- if (SU->getInstr()->mayLoad())
- SU->Latency += 2;
- } else {
- SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
- }
-}
-
-unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
- const SDep& dep,
- bool FindMin) const {
- // For a data dependency with a known register...
- if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
- return 1;
-
- return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
- Use->getInstr(), dep.getReg(), FindMin);
-}
-
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();
+#endif
}
std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
@@ -1029,3 +942,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
std::string ScheduleDAGInstrs::getDAGName() const {
return "dag." + BB->getFullName();
}
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ void backtrack() { DFSStack.pop_back(); }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+} // anonymous
+
+void ScheduleDAGILP::resize(unsigned NumSUnits) {
+ ILPValues.resize(NumSUnits);
+}
+
+ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
+ return ILPValues[SU->NodeNum];
+}
+
+// A leaf node has an ILP of 1/1.
+static ILPValue initILP(const SUnit *SU) {
+ unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
+ return ILPValue(Cnt, 1 + SU->getDepth());
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void ScheduleDAGILP::computeILP(const SUnit *Root) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+ SchedDAGReverseDFS DFS;
+ // Mark a node visited by validating it.
+ ILPValues[Root->NodeNum] = initILP(Root);
+ DFS.follow(Root);
+ for (;;) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SUnit *PredSU = DFS.getPred()->getSUnit();
+ DFS.advance();
+ // If the pred is already valid, skip it.
+ if (ILPValues[PredSU->NodeNum].isValid())
+ continue;
+ ILPValues[PredSU->NodeNum] = initILP(PredSU);
+ DFS.follow(PredSU);
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
+ DFS.backtrack();
+ if (DFS.isComplete())
+ break;
+ // Add the recently finished predecessor's bottom-up descendent count.
+ ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+ if (!isValid())
+ OS << "BADILP";
+ OS << InstrCount << " / " << Cycles << " = "
+ << format("%g", ((double)InstrCount / Cycles));
+}
+
+void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 38feee9..6e781b1 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Constants.h"
-#include "llvm/Function.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -35,7 +34,7 @@ namespace llvm {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
- return G->MF.getFunction()->getName();
+ return G->MF.getName();
}
static bool renderGraphFromBottomUp() {
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index e675366..2cd84d6 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() {
ReservedScoreboard.reset();
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScoreboardHazardRecognizer::Scoreboard::dump() const {
dbgs() << "Scoreboard:\n";
@@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const {
dbgs() << '\n';
}
}
+#endif
bool ScoreboardHazardRecognizer::atIssueLimit() const {
if (IssueWidth == 0)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4e29879..37d7731 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -194,6 +194,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVUnaryOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -269,6 +270,8 @@ namespace {
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -300,6 +303,11 @@ namespace {
/// looking for a better chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return True if some memory operations were changed.
+ bool MergeConsecutiveStores(StoreSDNode *N);
+
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -385,10 +393,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
unsigned Depth = 0) {
- // No compile time optimizations on this type.
- if (Op.getValueType() == MVT::ppcf128)
- return 0;
-
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
@@ -413,7 +417,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
!TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
return 0;
- // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
@@ -1643,7 +1647,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
- SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+ SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+ VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
N1.getOperand(0));
}
@@ -2345,16 +2350,19 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// we don't want to undo this promotion.
// We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
// on scalars.
- if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
- && Level == AfterLegalizeTypes) {
+ if ((N0.getOpcode() == ISD::BITCAST ||
+ N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ Level == AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
EVT In1Ty = In1.getValueType();
- // If both incoming values are integers, and the original types are the same.
+ DebugLoc DL = N->getDebugLoc();
+ // If both incoming values are integers, and the original types are the
+ // same.
if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
- SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
- SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
AddToWorkList(Op.getNode());
return BC;
}
@@ -2496,8 +2504,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// lanes of the constant together.
EVT VT = Vector->getValueType(0);
unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (BitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+ SplatBitSize < BitWidth;
+ SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
}
}
@@ -2984,7 +3002,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
- else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
@@ -3202,11 +3220,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if ((LShVal + RShVal) != OpSizeInBits)
return 0;
- SDValue Rot;
- if (HasROTL)
- Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
- else
- Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -3239,12 +3254,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTL)
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
+ return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
}
}
}
@@ -3256,25 +3267,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
if (SUBC->getAPIntValue() == OpSizeInBits) {
- if (HasROTR)
- return DAG.getNode(ISD::ROTR, DL, VT,
- LHSShiftArg, RHSShiftAmt).getNode();
- else
- return DAG.getNode(ISD::ROTL, DL, VT,
- LHSShiftArg, LHSShiftAmt).getNode();
+ return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
+ HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
}
}
}
// Look for sign/zext/any-extended or truncate cases:
- if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
- (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
- || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
if (RExtOp0.getOpcode() == ISD::SUB &&
@@ -4046,7 +4053,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT.isInteger() &&
(VT0 == MVT::i1 ||
(VT0.isInteger() &&
- TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) &&
+ TLI.getBooleanContents(false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -4412,20 +4420,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
- else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
- }
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -5235,13 +5241,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// if the source is smaller than the dest, we still need an extend
return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
N0.getOperand(0));
- else if (N0.getOperand(0).getValueType().bitsGT(VT))
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
// if the source is larger than the dest, than we just need the truncate
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
- else
- // if the source and dest are the same type, we can drop both the extend
- // and the truncate.
- return N0.getOperand(0);
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
}
// Fold extract-and-trunc into a narrow extract. For example:
@@ -5301,6 +5306,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (Reduced.getNode())
return Reduced;
}
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (X.getOpcode() != ISD::UNDEF) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+ AddToWorkList(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ &Opnds[0], Opnds.size());
+ }
+ }
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
@@ -5338,7 +5385,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
!LD2->isVolatile() &&
DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
- unsigned NewAlign = TLI.getTargetData()->
+ unsigned NewAlign = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Align &&
@@ -5407,7 +5454,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
!cast<LoadSDNode>(N0)->isVolatile() &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned Align = TLI.getTargetData()->
+ unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
@@ -5430,7 +5477,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
- N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+ N0.getNode()->hasOneUse() && VT.isInteger() &&
+ !VT.isVector() && !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
@@ -5653,7 +5701,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
}
// fold (fadd c1, c2) -> c1 + c2
- if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ if (N0CFP && N1CFP)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
@@ -5664,12 +5712,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
@@ -5681,6 +5729,139 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // If allow, fold (fadd (fneg x), x) -> 0.0
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // If allow, fold (fadd x, (fneg x)) -> 0.0
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) {
+ return DAG.getConstantFP(0.0, VT);
+ }
+
+ // In unsafe math mode, we can fold chains of FADD's of the same value
+ // into multiplications. This transform is not safe in general because
+ // we are reducing the number of rounding steps.
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
+ !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+
+ // (fadd (fmul c, x), x) -> (fmul c+1, x)
+ if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fmul x, c), x) -> (fmul c+1, x)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, NewCFP);
+ }
+
+ // (fadd (fadd x, x), x) -> (fmul 3.0, x)
+ if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) &&
+ N0.getOperand(0) == N1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N1, DAG.getConstantFP(3.0, VT));
+ }
+
+ // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP00, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP01, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+
+ // (fadd x, (fmul c, x)) -> (fmul c+1, x)
+ if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fmul x, c)) -> (fmul c+1, x)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(1.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, NewCFP);
+ }
+
+ // (fadd x, (fadd x, x)) -> (fmul 3.0, x)
+ if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0, DAG.getConstantFP(3.0, VT));
+ }
+
+ // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x)
+ if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(1) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP10, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(1), NewCFP);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x)
+ if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+ SDValue(CFP11, 0),
+ DAG.getConstantFP(2.0, VT));
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0), NewCFP);
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x)
+ if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getConstantFP(4.0, VT));
+ }
+ }
+
// FADD -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
@@ -5692,8 +5873,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
-
- // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
@@ -5719,7 +5900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub c1, c2) -> c1-c2
- if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
// fold (fsub A, 0) -> A
if (DAG.getTarget().Options.UnsafeFPMath &&
@@ -5811,7 +5992,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// fold (fmul c1, c2) -> c1*c2
- if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ if (N0CFP && N1CFP)
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
@@ -5867,7 +6048,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -5877,6 +6065,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FMUL &&
+ N0 == N2.getOperand(0) &&
+ N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
+ }
+
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0.getOpcode() == ISD::FMUL && N1CFP &&
+ N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
+ N2);
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ AddToWorkList(RHSNeg.getNode());
+ return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ }
+ }
+
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, VT)));
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT,
+ N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, VT)));
+ }
+
+
return SDValue();
}
@@ -5895,11 +6135,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// fold (fdiv c1, c2) -> c1/c2
- if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
- if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+ if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
// Compute the reciprocal 1.0 / c2.
APFloat N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
@@ -5942,7 +6182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (frem c1, c2) -> fmod(c1,c2)
- if (N0CFP && N1CFP && VT != MVT::ppcf128)
+ if (N0CFP && N1CFP)
return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
return SDValue();
@@ -5955,7 +6195,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold
+ if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
if (N1CFP) {
@@ -6005,7 +6245,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128 &&
+ if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6062,7 +6302,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- if (N0C && OpVT != MVT::ppcf128 &&
+ if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -6117,7 +6357,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fp_to_uint c1fp) -> c1
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
return SDValue();
@@ -6130,7 +6370,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fp_round c1fp) -> c1fp
- if (N0CFP && N0.getValueType() != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
// fold (fp_round (fp_extend x)) -> x
@@ -6184,7 +6424,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@@ -6225,6 +6465,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
@@ -6246,6 +6491,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
}
}
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+ N0.getOperand(1)));
+ }
+ }
+
return SDValue();
}
@@ -6255,7 +6511,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
return SDValue();
@@ -6267,7 +6523,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
return SDValue();
@@ -6279,7 +6535,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
return SDValue();
@@ -6290,8 +6546,13 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
+ if (VT.isVector()) {
+ SDValue FoldedVOp = SimplifyVUnaryOp(N);
+ if (FoldedVOp.getNode()) return FoldedVOp;
+ }
+
// fold (fabs c1) -> fabs(c1)
- if (N0CFP && VT != MVT::ppcf128)
+ if (N0CFP)
return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
if (N0.getOpcode() == ISD::FABS)
@@ -6511,7 +6772,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
} else
return false;
- TargetLowering::AddrMode AM;
+ AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
@@ -7138,7 +7399,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+ if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
return SDValue();
SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -7200,7 +7461,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
unsigned LDAlign = LD->getAlignment();
unsigned STAlign = ST->getAlignment();
Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
@@ -7225,6 +7486,433 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
+/// Returns the base pointer and an integer offset from that object.
+static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
+ if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ SDValue Base = Ptr->getOperand(0);
+ return std::make_pair(Base, Offset);
+ }
+
+ return std::make_pair(Ptr, 0);
+}
+
+/// Holds a pointer to an LSBaseSDNode as well as information on where it
+/// is located in a sequence of memory operations connected by a chain.
+struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+};
+
+/// Sorts store nodes in a link according to their offset from a shared
+// base ptr.
+struct ConsecutiveMemoryChainSorter {
+ bool operator()(MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+
+ // Don't merge vectors into wider inputs.
+ if (MemVT.isVector() || !MemVT.isSimple())
+ return false;
+
+ // Perform an early exit check. Do not bother looking at stored values that
+ // are not constants or loads.
+ SDValue StoredVal = St->getValue();
+ bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+ if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+ !IsLoadSrc)
+ return false;
+
+ // Only look at ends of store sequences.
+ SDValue Chain = SDValue(St, 1);
+ if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+ return false;
+
+ // This holds the base pointer and the offset in bytes from the base pointer.
+ std::pair<SDValue, int64_t> BasePtr =
+ GetPointerBaseAndOffset(St->getBasePtr());
+
+ // We must have a base and an offset.
+ if (!BasePtr.first.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.first.getOpcode() == ISD::UNDEF)
+ return false;
+
+ SmallVector<MemOpLink, 8> StoreNodes;
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ unsigned Seq = 0;
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 1)->hasOneUse())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ std::pair<SDValue, int64_t> Ptr =
+ GetPointerBaseAndOffset(Index->getBasePtr());
+
+ // Check that the base pointer is the same as the original one.
+ if (Ptr.first.getNode() != BasePtr.first.getNode())
+ break;
+
+ // Check that the alignment is the same.
+ if (Index->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // No truncation.
+ if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+ if (St->isTruncatingStore())
+ break;
+
+ // The stored memory type must be the same.
+ if (Index->getMemoryVT() != MemVT)
+ break;
+
+ // We do not allow unaligned stores because we want to prevent overriding
+ // stores.
+ if (Index->getAlignment()*8 != MemVT.getSizeInBits())
+ break;
+
+ // We found a potential memory operand to merge.
+ StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
+
+ // Move up the chain to the next memory operation.
+ Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
+ }
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // Sort the memory operands according to their distance from the base pointer.
+ std::sort(StoreNodes.begin(), StoreNodes.end(),
+ ConsecutiveMemoryChainSorter());
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // store memory address.
+ unsigned LastConsecutiveStore = 0;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ for (unsigned i=1; i<StoreNodes.size(); ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+
+ // Mark this node as useful.
+ LastConsecutiveStore = i;
+ }
+
+ // The node with the lowest store address.
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+
+ // Store the constants into memory as one consecutive store.
+ if (!IsLoadSrc) {
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non constant.
+ break;
+ }
+
+ // Find a legal type for the constant store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalType = i+1;
+
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(Ty))
+ LastLegalVectorType = i + 1;
+ }
+
+ // We only use vectors if the constant is known to be zero.
+ if (NonZero)
+ LastLegalVectorType = 0;
+
+ // Check if we found a legal integer type to store.
+ if (LastLegalType == 0 && LastLegalVectorType == 0)
+ return false;
+
+ bool UseVector = LastLegalVectorType > LastLegalType;
+ unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+ // Make sure we have something to merge.
+ if (NumElem < 2)
+ return false;
+
+ unsigned EarliestNodeUsed = 0;
+ for (unsigned i=0; i < NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // The earliest Node in the DAG.
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
+
+ SDValue StoredVal;
+ if (UseVector) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ StoredVal = DAG.getConstant(0, Ty);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ APInt StoreInt(StoreBW, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = TLI.isLittleEndian();
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ SDValue Val = St->getValue();
+ StoreInt<<=ElementSizeBytes*8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt|=C->getAPIntValue().zext(StoreBW);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+ } else {
+ assert(false && "Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ StoredVal = DAG.getConstant(StoreInt, StoreTy);
+ }
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ false, false,
+ FirstInChain->getAlignment());
+
+ // Replace the first store with the new store
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+ }
+
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ SDValue LdBasePtr;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld) break;
+
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ break;
+
+ // Check that the alignment is the same as the stores.
+ if (Ld->getAlignment() != St->getAlignment())
+ break;
+
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
+
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
+ break;
+
+ std::pair<SDValue, int64_t> LdPtr =
+ GetPointerBaseAndOffset(Ld->getBasePtr());
+
+ // If this is not the first ptr that we check.
+ if (LdBasePtr.getNode()) {
+ // The base ptr must be the same.
+ if (LdPtr.first != LdBasePtr)
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr.first;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0));
+ }
+
+ if (LoadNodes.size() < 2)
+ return false;
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // load memory address. These variables hold the index in the store node
+ // array.
+ unsigned LastConsecutiveLoad = 0;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 0;
+ unsigned LastLegalIntegerType = 0;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads much share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalVectorType = i + 1;
+
+ // Find a legal type for the integer store.
+ unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ if (TLI.isTypeLegal(StoreTy))
+ LastLegalIntegerType = i + 1;
+ }
+
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType;
+ unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2)
+ return false;
+
+ // The earliest Node in the DAG.
+ unsigned EarliestNodeUsed = 0;
+ LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+ for (unsigned i=1; i<NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // earliest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
+ EarliestNodeUsed = i;
+ }
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+ } else {
+ unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+ }
+
+ DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc();
+ DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc();
+
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+ FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(),
+ false, false, false,
+ FirstLoad->getAlignment());
+
+ SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), false, false,
+ FirstInChain->getAlignment());
+
+ // Replace one of the loads with the new load.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+
+ // Remove the rest of the load chains.
+ for (unsigned i = 1; i < NumElem ; ++i) {
+ // Replace all chain users of the old load nodes with the chain of the new
+ // load node.
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ }
+
+ // Replace the first store with the new store.
+ CombineTo(EarliestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem ; ++i) {
+ // Remove all Store nodes.
+ if (StoreNodes[i].MemNode == EarliestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ removeFromWorkList(St);
+ DAG.DeleteNode(St);
+ }
+
+ return true;
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -7237,7 +7925,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
- unsigned Align = TLI.getTargetData()->
+ unsigned Align = TLI.getDataLayout()->
getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
if (Align <= OrigAlign &&
((!LegalOperations && !ST->isVolatile()) ||
@@ -7426,6 +8114,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
ST->getAlignment());
}
+ // Only perform this optimization before the types are legal, because we
+ // don't want to perform this optimization on every DAGCombine invocation.
+ if (!LegalTypes && MergeConsecutiveStores(ST))
+ return SDValue(N, 0);
+
return ReduceLoadOpStoreWidth(N);
}
@@ -7504,9 +8197,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
- // we may introduce new vector instructions which are not backed by TD patterns.
- // For example on AVX, extracting elements from a wide vector without using
- // extract_subvector.
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector.
if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
&& ConstEltNo && !LegalOperations) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
@@ -7625,7 +8318,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()
+ TLI.getDataLayout()
->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
@@ -7690,15 +8383,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- // A vector built entirely of undefs is undef.
- if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(VT);
-
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
@@ -7741,64 +8440,141 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
- EVT OutScalarTy = N->getValueType(0).getScalarType();
+ EVT OutScalarTy = VT.getScalarType();
bool ValidTypes = SourceType != MVT::Other &&
isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
isPowerOf2_32(SourceType.getSizeInBits());
- // We perform this optimization post type-legalization because
- // the type-legalizer often scalarizes integer-promoted vectors.
- // Performing this optimization before may create bit-casts which
- // will be type-legalized to complex code sequences.
- // We perform this optimization only before the operation legalizer because we
- // may introduce illegal operations.
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
- if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
- ValidTypes) {
- bool isLE = TLI.isLittleEndian();
- unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
- assert(ElemRatio > 1 && "Invalid element size ratio");
- SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SourceType);
-
- unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
- SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
-
- // Populate the new build_vector
- for (unsigned i=0; i < N->getNumOperands(); ++i) {
- SDValue Cast = N->getOperand(i);
- assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
- Cast.getOpcode() == ISD::ZERO_EXTEND ||
- Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
- SDValue In;
- if (Cast.getOpcode() == ISD::UNDEF)
- In = DAG.getUNDEF(SourceType);
- else
- In = Cast->getOperand(0);
- unsigned Index = isLE ? (i * ElemRatio) :
- (i * ElemRatio + (ElemRatio - 1));
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = TLI.isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.getOpcode() == ISD::UNDEF)
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorkList(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ EVT SrcVT = MVT::Other;
+ unsigned Opcode = ISD::DELETED_NODE;
+ unsigned NumDefs = 0;
- assert(Index < Ops.size() && "Invalid index");
- Ops[Index] = In;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opc = In.getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ // If all scalar values are floats and converted from integers.
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+ // If not supported by target, bail out.
+ if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+ TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+ return SDValue();
}
+ if (Opc != Opcode)
+ return SDValue();
- // The type of the new BUILD_VECTOR node.
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
- assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
- "Invalid vector size");
- // Check if the new vector type is legal.
- if (!isTypeLegal(VecVT)) return SDValue();
+ EVT InVT = In.getOperand(0).getValueType();
- // Make the new BUILD_VECTOR.
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- VecVT, &Ops[0], Ops.size());
+ // If all scalar values are typed differently, bail out. It's chosen to
+ // simplify BUILD_VECTOR of integer types.
+ if (SrcVT == MVT::Other)
+ SrcVT = InVT;
+ if (SrcVT != InVT)
+ return SDValue();
+ NumDefs++;
+ }
+
+ // If the vector has just one element defined, it's not worth to fold it into
+ // a vectorized one.
+ if (NumDefs < 2)
+ return SDValue();
- // The new BUILD_VECTOR node has the potential to be further optimized.
- AddToWorkList(BV.getNode());
- // Bitcast to the desired type.
- return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+ assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+ && "Should only handle conversion from integer to float.");
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+
+ if (In.getOpcode() == ISD::UNDEF)
+ Opnds.push_back(DAG.getUNDEF(SrcVT));
+ else
+ Opnds.push_back(In.getOperand(0));
}
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ &Opnds[0], Opnds.size());
+ AddToWorkList(BV.getNode());
+
+ return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ SDValue V = reduceBuildVecExtToExtBuildVec(N);
+ if (V.getNode())
+ return V;
+
+ V = reduceBuildVecConvertToConvertBuildVec(N);
+ if (V.getNode())
+ return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
// operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -7876,15 +8652,22 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ if (VecIn1.getValueType().getVectorElementType() !=
+ VT.getVectorElementType())
+ return SDValue();
+
// Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
}
// If VecIn2 is unused then change it to undef.
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
- // Check that we were able to transform all incoming values to the same type.
+ // Check that we were able to transform all incoming values to the same
+ // type.
if (VecIn2.getValueType() != VecIn1.getValueType() ||
VecIn1.getValueType() != VT)
return SDValue();
@@ -7897,7 +8680,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
SDValue Ops[2];
Ops[0] = VecIn1;
Ops[1] = VecIn2;
- return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
}
return SDValue();
@@ -7933,8 +8716,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants with the same type.
- ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@@ -7951,6 +8734,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
return SDValue();
}
@@ -8266,6 +9064,44 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
+/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
+ // After legalize, the target may be depending on adds and other
+ // binary ops to provide legal ways to construct constants or other
+ // things. Simplifying them may result in a loss of legality.
+ if (LegalOperations) return SDValue();
+
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVUnaryOp only works on vectors!");
+
+ SDValue N0 = N->getOperand(0);
+
+ if (N0.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Operand is a BUILD_VECTOR node, see if we can constant fold it.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue Op = N0.getOperand(i);
+ if (Op.getOpcode() != ISD::UNDEF &&
+ Op.getOpcode() != ISD::ConstantFP)
+ break;
+ EVT EltVT = Op.getValueType();
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
+ }
+
+ if (Ops.size() != N0.getNumOperands())
+ return SDValue();
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ N0.getValueType(), &Ops[0], Ops.size());
+}
+
SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
SDValue N1, SDValue N2){
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
@@ -8349,6 +9185,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
(RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
@@ -8468,7 +9308,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
const_cast<ConstantFP*>(TV->getConstantFPValue())
};
Type *FPTy = Elts[0]->getType();
- const TargetData &TD = *TLI.getTargetData();
+ const DataLayout &TD = *TLI.getDataLayout();
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
@@ -8583,34 +9423,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
return SDValue();
// Get a SetCC of the condition
- // FIXME: Should probably make sure that setcc is legal if we ever have a
- // target where it isn't.
- SDValue Temp, SCC;
- // cast from setcc result type to select result type
- if (LegalTypes) {
- SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
- N0, N1, CC);
- if (N2.getValueType().bitsLT(SCC.getValueType()))
- Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
- else
+ // NOTE: Don't create a SETCC if it's not legal on this target.
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC,
+ LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(),
+ N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
N2.getValueType(), SCC);
- } else {
- SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
- Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
- N2.getValueType(), SCC);
- }
+ }
- AddToWorkList(SCC.getNode());
- AddToWorkList(Temp.getNode());
+ AddToWorkList(SCC.getNode());
+ AddToWorkList(Temp.getNode());
- if (N2C->getAPIntValue() == 1)
- return Temp;
+ if (N2C->getAPIntValue() == 1)
+ return Temp;
- // shl setcc result by log2 n2c
- return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- getShiftAmountTy(Temp.getValueType())));
+ // shl setcc result by log2 n2c
+ return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(),
+ getShiftAmountTy(Temp.getValueType())));
+ }
}
// Check to see if this is the equivalent of setcc
@@ -8729,7 +9573,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// to alias with anything but itself. Provides base object and offset as
// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
- const GlobalValue *&GV, void *&CV) {
+ const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
Base = Ptr; Offset = 0; GV = 0; CV = 0;
@@ -8754,8 +9598,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
// for ConstantSDNodes since the same constant pool entry may be represented
// by multiple nodes with different offsets.
if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
- CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
- : (void *)C->getConstVal();
+ CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+ : (const void *)C->getConstVal();
Offset += C->getOffset();
return false;
}
@@ -8780,7 +9624,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
SDValue Base1, Base2;
int64_t Offset1, Offset2;
const GlobalValue *GV1, *GV2;
- void *CV1, *CV2;
+ const void *CV1, *CV2;
bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 683fac6..4854cf7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -53,7 +53,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -1059,7 +1059,7 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo,
MFI(*FuncInfo.MF->getFrameInfo()),
MCP(*FuncInfo.MF->getConstantPool()),
TM(FuncInfo.MF->getTarget()),
- TD(*TM.getTargetData()),
+ TD(*TM.getDataLayout()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
TRI(*TM.getRegisterInfo()),
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 3e18ea7..a418290 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -80,9 +80,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
Type *Ty = AI->getAllocatedType();
- uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
AI->getAlignment());
TySize *= CUI->getZExtValue(); // Get total allocated size.
@@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
- MayNeedSP);
+ MayNeedSP, AI);
}
for (; BB != EB; ++BB)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4488d27..a8381b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -20,7 +20,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
///
/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
/// the chain and glue. These operands may be implicit on the machine instr.
-static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
unsigned N = Node->getNumOperands();
while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
@@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) {
--N; // Ignore chain if it exists.
// Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
- for (unsigned I = N; I; --I) {
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
@@ -312,8 +314,6 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
- assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
- "Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
@@ -390,10 +390,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
Type *Type = CP->getType();
// MachineConstantPool wants an explicit alignment.
if (Align == 0) {
- Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+ Align = TM->getDataLayout()->getPrefTypeAlignment(Type);
if (Align == 0) {
// Alignment of vector types. FIXME!
- Align = TM->getTargetData()->getTypeAllocSize(Type);
+ Align = TM->getDataLayout()->getTypeAllocSize(Type);
}
}
@@ -410,6 +410,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
ES->getTargetFlags()));
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+ BA->getOffset(),
BA->getTargetFlags()));
} else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
@@ -720,7 +721,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NumImpUses = 0;
- unsigned NodeOperands = countOperands(Node, NumImpUses);
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses);
bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
@@ -870,6 +872,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -884,25 +897,30 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
- // Add the HasSideEffect and isAlignStack bits.
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
int64_t ExtraInfo =
cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
getZExtValue();
MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
- unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ GroupIdx.push_back(MI->getNumOperands());
MI->addOperand(MachineOperand::CreateImm(Flags));
++i; // Skip the ID value.
switch (InlineAsm::getKind(Flags)) {
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
- for (; NumVals; --NumVals, ++i) {
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
@@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
- for (; NumVals; --NumVals, ++i) {
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
/*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
@@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case InlineAsm::Kind_Mem: // Addressing mode.
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
- for (; NumVals; --NumVals, ++i)
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
/*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MI->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
break;
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 908ebb9..abf40b7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -718,7 +718,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node),
DAG, TLI, this);
@@ -824,7 +824,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
}
@@ -869,25 +869,24 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- ExpandUnalignedLoad(cast<LoadSDNode>(Node),
- DAG, TLI, RVal, RChain);
- }
- }
- break;
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+ }
+ }
+ break;
case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(RVal, DAG);
- if (Res.getNode()) {
- RVal = Res;
- RChain = Res.getValue(1);
- }
- break;
+ SDValue Res = TLI.LowerOperation(RVal, DAG);
+ if (Res.getNode()) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
}
case TargetLowering::Promote: {
// Only promote a load of vector type to another.
@@ -1060,7 +1059,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Type *Ty =
LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
+ TLI.getDataLayout()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
ExpandUnalignedLoad(cast<LoadSDNode>(Node),
DAG, TLI, Value, Chain);
@@ -1241,6 +1240,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1588,26 +1600,71 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
break;
case TargetLowering::Expand: {
ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ ISD::CondCode InvCC = ISD::SETCC_INVALID;
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
- case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO; Opc = ISD::AND; break;
- case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR; break;
- // FIXME: Implement more expansions.
- }
-
- SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
- SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ case ISD::SETO:
+ assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+ == TargetLowering::Legal
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+ == TargetLowering::Legal
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
+ // We only support using the inverted operation and not a
+ // different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+ LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
+ RHS = SDValue();
+ CC = SDValue();
+ return;
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
@@ -1626,7 +1683,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
DebugLoc dl) {
// Create the stack frame object.
unsigned SrcAlign =
- TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+ TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
getTypeForEVT(*DAG.getContext()));
SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
@@ -1638,7 +1695,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
unsigned SlotSize = SlotVT.getSizeInBits();
unsigned DestSize = DestVT.getSizeInBits();
Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
- unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+ unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
// Emit a store to the stack slot. Use a truncstore if the input value is
// later than DestVT.
@@ -2042,7 +2099,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Op0,
EVT DestVT,
DebugLoc dl) {
- if (Op0.getValueType() == MVT::i32) {
+ if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
// Get the stack frame index of a 8 byte buffer.
@@ -2787,7 +2844,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
- DAG.getConstant(TLI.getTargetData()->
+ DAG.getConstant(TLI.getDataLayout()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
@@ -3109,6 +3166,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(1);
if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
(isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+ // If div is legal, it's better to do the normal expansion
+ !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
useDivRem(Node, isSigned, false))) {
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
} else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
@@ -3366,7 +3425,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT PTy = TLI.getPointerTy();
- const TargetData &TD = *TLI.getTargetData();
+ const DataLayout &TD = *TLI.getDataLayout();
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e393896..92dc5a9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1245,32 +1245,30 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
- if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
- == TargetLowering::Custom)
- Res = TLI.LowerOperation(SDValue(N, 0), DAG);
-
- if (Res.getNode() == 0) {
- switch (N->getOpcode()) {
- default:
- #ifndef NDEBUG
- dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
- N->dump(&DAG); dbgs() << "\n";
- #endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
-
- case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
- case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
- case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
-
- case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
- case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
- case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
- case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
- case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
- case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
- case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
- OpNo); break;
- }
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e8e968a..a370fae 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -644,8 +644,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
EVT SmallVT = LHS.getValueType();
// To determine if the result overflowed in a larger type, we extend the
- // input to the larger type, do the multiply, then check the high bits of
- // the result to see if the overflow happened.
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
if (N->getOpcode() == ISD::SMULO) {
LHS = SExtPromotedInteger(LHS);
RHS = SExtPromotedInteger(RHS);
@@ -653,24 +654,31 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
LHS = ZExtPromotedInteger(LHS);
RHS = ZExtPromotedInteger(RHS);
}
- SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
- // Overflow occurred iff the high part of the result does not
- // zero/sign-extend the low part.
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
SDValue Overflow;
if (N->getOpcode() == ISD::UMULO) {
- // Unsigned overflow occurred iff the high part is non-zero.
+ // Unsigned overflow occurred if the high part is non-zero.
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
} else {
- // Signed overflow occurred iff the high part does not sign extend the low.
+ // Signed overflow occurred if the high part does not sign extend the low.
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
Mul, DAG.getValueType(SmallVT));
Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
}
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
// Use the calculated overflow everywhere.
ReplaceValueWith(SDValue(N, 1), Overflow);
return Mul;
@@ -2253,32 +2261,35 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
- Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
- EVT PtrVT = TLI.getPointerTy();
- Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
DebugLoc dl = N->getDebugLoc();
// A divide for UMULO should be faster than a function call.
if (N->getOpcode() == ISD::UMULO) {
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
- DebugLoc DL = N->getDebugLoc();
- SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+ SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(MUL, Lo, Hi);
// A divide for UMULO will be faster than a function call. Select to
// make sure we aren't using 0.
SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
- RHS, DAG.getConstant(0, VT), ISD::SETNE);
+ RHS, DAG.getConstant(0, VT), ISD::SETEQ);
SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
DAG.getConstant(1, VT), RHS);
- SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
- SDValue Overflow;
- Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+ SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+ SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+ ISD::SETNE);
+ Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero,
+ DAG.getConstant(0, N->getValueType(1)),
+ Overflow);
ReplaceValueWith(SDValue(N, 1), Overflow);
return;
}
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy();
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
// Replace this with a libcall that will check overflow.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i32)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 39337ff..644e36e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -15,7 +15,7 @@
#include "LegalizeTypes.h"
#include "llvm/CallingConv.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 94fc976..20b7ce6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -625,6 +625,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_VSETCC(SDNode* N);
+ SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
@@ -633,7 +634,7 @@ private:
SDValue WidenVecRes_InregOp(SDNode *N);
// Widen Vector Operand.
- bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 06f6bd6..6bcb3b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -20,7 +20,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -94,14 +94,48 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
if (InVT.isVector() && OutVT.isInteger()) {
// Handle cases like i64 = BITCAST v1i64 on x86, where the operand
// is legal but the result is not.
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
if (isTypeLegal(NVT)) {
SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
- Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
- DAG.getIntPtrConstant(1));
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
+ CastInOp, DAG.getIntPtrConstant(i)));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (TLI.isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(
+ *DAG.getContext(),
+ LHS.getValueType().getSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
if (TLI.isBigEndian())
std::swap(Lo, Hi);
@@ -116,7 +150,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Create the stack frame object. Make sure it is aligned for both
// the source and expanded destination types.
unsigned Alignment =
- TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+ TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 704f99b..22f8d51 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,7 @@ class VectorLegalizer {
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
@@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
+ case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
@@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case TargetLowering::Expand:
if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
+ else if (Node->getOpcode() == ISD::SELECT)
+ Result = ExpandSELECT(Op);
else if (Node->getOpcode() == ISD::UINT_TO_FP)
Result = ExpandUINT_TO_FLOAT(Op);
else if (Node->getOpcode() == ISD::FNEG)
@@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
return TF;
}
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ unsigned NumElem = VT.getVectorNumElements();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = TLI.getSetCCResultType(VT);
+ assert(MaskTy.isVector() && "Invalid CC type");
+ assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits()
+ && "Invalid mask size");
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy),
+ DAG.getConstant(0, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ SmallVector<SDValue, 8> Ops(NumElem, Mask);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
// 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(true) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
- assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
+ assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits()
&& "Invalid mask size");
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4709202..d51a6eb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -749,7 +749,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
- TLI.getTargetData()->getPrefTypeAlignment(VecType);
+ TLI.getDataLayout()->getPrefTypeAlignment(VecType);
Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
@@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FTRUNC:
Res = WidenVecRes_Unary(N);
break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
SetWidenedVector(SDValue(N, ResNo), Res);
}
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ DebugLoc dl = N->getDebugLoc();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
unsigned Opcode = N->getOpcode();
@@ -2069,16 +2082,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
//===----------------------------------------------------------------------===//
// Widen Vector Operand
//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n");
SDValue Res = SDValue();
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
- dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
N->dump(&DAG);
dbgs() << "\n";
#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
index f88b26d..d2269f8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -28,8 +28,8 @@ class SDNode;
class SDNodeOrdering {
DenseMap<const SDNode*, unsigned> OrderMap;
- void operator=(const SDNodeOrdering&); // Do not implement.
- SDNodeOrdering(const SDNodeOrdering&); // Do not implement.
+ void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
+ SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION;
public:
SDNodeOrdering() {}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b7ce48a..2ecdd89 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,11 +13,12 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SmallSet.h"
@@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
namespace {
/// FastPriorityQueue - A degenerate priority queue that considers
@@ -331,7 +336,9 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
}
}
if (isNewLoad) {
- AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
}
++NumUnfolds;
@@ -407,9 +414,12 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
RemovePred(DelDeps[i].first, DelDeps[i].second);
}
-
- AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
- AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU);
@@ -586,18 +596,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
CurSU = NewDef;
}
@@ -629,6 +635,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
#endif
}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(0);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = 0;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -637,3 +792,8 @@ llvm::ScheduleDAGSDNodes *
llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
return new ScheduleDAGFast(*IS->MF);
}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index bf0a437..c554569 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
break;
case ISD::MERGE_VALUES:
case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
case ISD::CopyToReg:
case ISD::CopyFromReg:
case ISD::EH_LABEL:
@@ -1056,7 +1058,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
// Add a data dependency to reflect that NewSU reads the value defined
// by LoadSU.
- AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
@@ -1138,17 +1142,18 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
// Avoid scheduling the def-side copy before other successors. Otherwise
// we could introduce another physreg interference on the copy and
// continue inserting copies indefinitely.
- SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true);
- AddPred(SuccSU, D);
+ AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
- AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
- AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(CopyFromSU);
@@ -1357,9 +1362,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (!BtSU->isPending)
AvailableQueue->remove(BtSU);
}
- AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
+ AddPred(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
// node is no longer avaialable. Schedule a successor that's now
@@ -1411,20 +1414,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
CurSU = NewDef;
}
@@ -1756,6 +1753,7 @@ public:
return V;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump(ScheduleDAG *DAG) const {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
@@ -1766,6 +1764,7 @@ public:
SU->dump(DAG);
}
}
+#endif
};
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
@@ -1893,6 +1892,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
//===----------------------------------------------------------------------===//
void RegReductionPQBase::dumpRegPressure() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
E = TRI->regclass_end(); I != E; ++I) {
const TargetRegisterClass *RC = *I;
@@ -1902,6 +1902,7 @@ void RegReductionPQBase::dumpRegPressure() const {
DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
<< '\n');
}
+#endif
}
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
@@ -2930,10 +2931,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
<< SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
- scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
+ scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 748668c..a197fcb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -485,14 +485,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if(isChain && OpN->getOpcode() == ISD::TokenFactor)
OpLatency = 0;
- const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
- OpLatency, PhysReg);
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
if (!isChain && !UnitLatencies) {
- computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
- ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, SU, Dep);
}
- if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
// Multiple register uses are combined in the same SUnit. For example,
// we could have a set of glued nodes with all their defs consumed by
// another set of glued nodes. Register pressure tracking sees this as
@@ -643,6 +644,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
return;
@@ -659,8 +661,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
dbgs() << "\n";
GluedNodes.pop_back();
}
+#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
@@ -669,6 +673,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const {
dbgs() << "**** NOOP ****\n";
}
}
+#endif
#ifndef NDEBUG
/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
@@ -827,8 +832,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
SmallVector<SDNode *, 4> GluedNodes;
- for (SDNode *N = SU->getNode()->getGluedNode(); N;
- N = N->getGluedNode())
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 84e41fc..907356f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -114,7 +114,8 @@ namespace llvm {
/// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
/// according to the order specified in Sequence.
///
- MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
virtual void dumpNode(const SUnit *SU) const;
@@ -158,6 +159,12 @@ namespace llvm {
void InitNodeNumDefs();
};
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index c851291..30f03ac 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f4fe892..f000ce3 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -29,7 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -91,11 +91,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
const APFloat& Val) {
assert(VT.isFloatingPoint() && "Can only convert between FP types");
- // PPC long double cannot be converted to any other type.
- if (VT == MVT::ppcf128 ||
- &Val.getSemantics() == &APFloat::PPCDoubleDouble)
- return false;
-
// convert modifies in place, so make a copy.
APFloat Val2 = APFloat(Val);
bool losesInfo;
@@ -136,13 +131,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// constants are.
SDValue NotZero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
- if (isa<ConstantSDNode>(NotZero)) {
- if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
- EltSize)
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
return false;
- } else if (isa<ConstantFPSDNode>(NotZero)) {
- if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
- .bitcastToAPInt().countTrailingOnes() < EltSize)
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
return false;
} else
return false;
@@ -179,11 +172,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Do not accept build_vectors that aren't all constants or which have non-0
// elements.
SDValue Zero = N->getOperand(i);
- if (isa<ConstantSDNode>(Zero)) {
- if (!cast<ConstantSDNode>(Zero)->isNullValue())
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (!CN->isNullValue())
return false;
- } else if (isa<ConstantFPSDNode>(Zero)) {
- if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (!CFPN->getValueAPF().isPosZero())
return false;
} else
return false;
@@ -494,8 +487,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
}
case ISD::TargetBlockAddress:
case ISD::BlockAddress: {
- ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
- ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
break;
}
} // end switch (N->getOpcode())
@@ -883,7 +878,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
VT.getTypeForEVT(*getContext());
- return TLI.getTargetData()->getABITypeAlignment(Ty);
+ return TLI.getDataLayout()->getABITypeAlignment(Ty);
}
// EntryNode could meaningfully have debug info if we can find it...
@@ -1097,10 +1092,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
"Cannot set target flags on target-independent globals");
// Truncate (with sign-extension) the offset value to the pointer size.
- EVT PTy = TLI.getPointerTy();
- unsigned BitWidth = PTy.getSizeInBits();
+ unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
if (BitWidth < 64)
- Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+ Offset = SignExtend64(Offset, BitWidth);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
@@ -1174,7 +1168,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1201,7 +1195,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+ Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@@ -1471,6 +1465,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset,
bool isTarget,
unsigned char TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
@@ -1478,12 +1473,14 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
ID.AddPointer(BA);
+ ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+ SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
+ TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1542,7 +1539,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
- std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI.getPointerTy());
@@ -1555,7 +1552,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
VT2.getStoreSizeInBits())/8;
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
- const TargetData *TD = TLI.getTargetData();
+ const DataLayout *TD = TLI.getDataLayout();
unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
TD->getPrefTypeAlignment(Ty2));
@@ -1610,10 +1607,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
}
if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
- // No compile time operations on this type yet.
- if (N1C->getValueType(0) == MVT::ppcf128)
- return SDValue();
-
APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
switch (Cond) {
default: break;
@@ -2445,8 +2438,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- // No compile time operations on ppcf128.
- if (VT == MVT::ppcf128) break;
APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
@@ -2455,9 +2446,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
}
case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(Val.bitsToFloat(), VT);
+ return getConstantFP(APFloat(Val), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(Val.bitsToDouble(), VT);
+ return getConstantFP(APFloat(Val), VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), VT);
@@ -2475,61 +2466,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
// Constant fold unary operations with a floating point constant operand.
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
APFloat V = C->getValueAPF(); // make copy
- if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
- switch (Opcode) {
- case ISD::FNEG:
- V.changeSign();
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, VT);
- case ISD::FABS:
- V.clearSign();
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, VT);
- case ISD::FCEIL: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, VT);
- break;
- }
- case ISD::FTRUNC: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, VT);
- break;
- }
- case ISD::FFLOOR: {
- APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
- if (fs == APFloat::opOK || fs == APFloat::opInexact)
- return getConstantFP(V, VT);
- break;
- }
- case ISD::FP_EXTEND: {
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(*EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
return getConstantFP(V, VT);
- }
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: {
- integerPart x[2];
- bool ignored;
- assert(integerPartWidth >= 64);
- // FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
- Opcode==ISD::FP_TO_SINT,
- APFloat::rmTowardZero, &ignored);
- if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
- break;
- APInt api(VT.getSizeInBits(), x);
- return getConstant(api, VT);
- }
- case ISD::BITCAST:
- if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
- return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
- else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
- return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(*EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ assert(integerPartWidth >= 64);
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- }
+ APInt api(VT.getSizeInBits(), x);
+ return getConstant(api, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+ break;
}
}
@@ -2817,6 +2806,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
if (CFP->getValueAPF().isZero())
return N1;
+ } else if (Opcode == ISD::FMUL) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
+ SDValue V = N2;
+
+ // If the first operand isn't the constant, try the second
+ if (!CFP) {
+ CFP = dyn_cast<ConstantFPSDNode>(N2);
+ V = N1;
+ }
+
+ if (CFP) {
+ // 0*x --> 0
+ if (CFP->isZero())
+ return SDValue(CFP,0);
+ // 1*x --> x
+ if (CFP->isExactlyValue(1.0))
+ return V;
+ }
}
}
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -2935,17 +2942,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// expanding large vector constants.
if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
SDValue Elt = N1.getOperand(N2C->getZExtValue());
- EVT VEltTy = N1.getValueType().getVectorElementType();
- if (Elt.getValueType() != VEltTy) {
+
+ if (VT != Elt.getValueType())
// If the vector element type is not legal, the BUILD_VECTOR operands
- // are promoted and implicitly truncated. Make that explicit here.
- Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
- }
- if (VT != VEltTy) {
- // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
- // result is implicitly extended.
- Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
- }
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
return Elt;
}
@@ -3036,7 +3039,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Cannonicalize constant to RHS if commutative
std::swap(N1CFP, N2CFP);
std::swap(N1, N2);
- } else if (N2CFP && VT != MVT::ppcf128) {
+ } else if (N2CFP) {
APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
APFloat::opStatus s;
switch (Opcode) {
@@ -3435,7 +3438,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction());
if (VT == MVT::Other) {
- if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+ if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
TLI.allowsUnalignedMemoryAccesses(VT)) {
VT = TLI.getPointerTy();
} else {
@@ -3503,7 +3506,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize =
+ MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3523,7 +3528,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3596,7 +3601,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3612,7 +3618,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3674,7 +3680,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+ bool OptSize = MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3687,7 +3694,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
if (DstAlignCanChange) {
Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -3781,7 +3788,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
@@ -3836,7 +3843,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+ Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
@@ -3885,7 +3892,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
return Result;
// Emit a library call.
- Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+ Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -3923,17 +3930,21 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
// For now, atomics are considered to be volatile always.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- Flags |= MachineMemOperand::MOVolatile;
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
@@ -3983,17 +3994,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- // A monotonic store does not load; a release store "loads" in the sense
- // that other stores cannot be sunk past it.
+ // An atomic store does not load. An atomic load does not store.
// (An atomicrmw obviously both loads and stores.)
- unsigned Flags = MachineMemOperand::MOStore;
- if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic)
- Flags |= MachineMemOperand::MOLoad;
-
- // For now, atomics are considered to be volatile always.
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- Flags |= MachineMemOperand::MOVolatile;
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
@@ -4056,16 +4067,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- // A monotonic load does not store; an acquire load "stores" in the sense
- // that other loads cannot be hoisted past it.
- unsigned Flags = MachineMemOperand::MOLoad;
- if (Ordering > Monotonic)
- Flags |= MachineMemOperand::MOStore;
-
- // For now, atomics are considered to be volatile always.
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- Flags |= MachineMemOperand::MOVolatile;
+ unsigned Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
@@ -4157,6 +4169,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
+ Opcode == ISD::LIFETIME_START ||
+ Opcode == ISD::LIFETIME_END ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -4226,7 +4240,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
bool isVolatile, bool isNonTemporal, bool isInvariant,
unsigned Alignment, const MDNode *TBAAInfo,
const MDNode *Ranges) {
- assert(Chain.getValueType() == MVT::Other &&
+ assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
@@ -4284,7 +4298,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
- MMO->isNonTemporal(),
+ MMO->isNonTemporal(),
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = 0;
@@ -4303,7 +4317,7 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- bool isInvariant, unsigned Alignment,
+ bool isInvariant, unsigned Alignment,
const MDNode *TBAAInfo,
const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -4332,7 +4346,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
+ LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
false, LD->getAlignment());
}
@@ -4340,7 +4354,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
unsigned Alignment, const MDNode *TBAAInfo) {
- assert(Chain.getValueType() == MVT::Other &&
+ assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
@@ -4365,7 +4379,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {
- assert(Chain.getValueType() == MVT::Other &&
+ assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
@@ -4394,7 +4408,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
EVT SVT,bool isVolatile, bool isNonTemporal,
unsigned Alignment,
const MDNode *TBAAInfo) {
- assert(Chain.getValueType() == MVT::Other &&
+ assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
@@ -4421,7 +4435,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
MachineMemOperand *MMO) {
EVT VT = Val.getValueType();
- assert(Chain.getValueType() == MVT::Other &&
+ assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (VT == SVT)
return getStore(Chain, dl, Val, Ptr, MMO);
@@ -6074,7 +6088,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
- TLI.getTargetData());
+ TLI.getDataLayout());
unsigned AlignBits = KnownZero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ba5bd79..3fbf7c2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/CallingConv.h"
#include "llvm/DebugInfo.h"
@@ -43,7 +44,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -88,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT);
+ EVT PartVT, EVT ValueVT, const Value *V);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -98,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
+ const Value *V,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
- return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -124,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
if (RoundParts > 2) {
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
- PartVT, HalfVT);
+ PartVT, HalfVT, V);
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
- RoundParts / 2, PartVT, HalfVT);
+ RoundParts / 2, PartVT, HalfVT, V);
} else {
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -142,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
Hi = getCopyFromParts(DAG, DL,
- Parts + RoundParts, OddParts, PartVT, OddVT);
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
// Combine the round and odd parts.
Lo = Val;
@@ -171,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
}
}
@@ -209,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
llvm_unreachable("Unknown mismatch!");
}
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent. If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
- EVT PartVT, EVT ValueVT) {
+ EVT PartVT, EVT ValueVT, const Value *V) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -242,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
// operands from the parts.
@@ -251,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
+ PartVT, IntermediateVT, V);
}
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -299,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
// Handle cases such as i8 -> <1 x i1>
- assert(ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
+ if (ValueVT.getVectorNumElements() != 1) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("non-trivial scalar-to-vector conversion");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ }
if (ValueVT.getVectorNumElements() == 1 &&
ValueVT.getVectorElementType() != PartVT) {
@@ -312,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
-
-
-
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT);
+ EVT PartVT, const Value *V);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT,
+ EVT PartVT, const Value *V,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
- return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
@@ -382,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
+ if (PartVT != ValueVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Twine ErrMsg("scalar-to-vector conversion failed");
+ if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+ Ctx.emitError(I, ErrMsg);
+ } else {
+ Ctx.emitError(ErrMsg);
+ }
+ }
+
Parts[0] = Val;
return;
}
@@ -397,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
DAG.getIntPtrConstant(RoundBits));
- getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
if (TLI.isBigEndian())
// The odd parts were reversed by getCopyToParts - unreverse them.
@@ -443,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- EVT PartVT) {
+ EVT PartVT, const Value *V) {
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -529,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -537,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
}
}
-
-
-
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -621,14 +641,15 @@ namespace {
/// If the Flag pointer is NULL, no flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag,
+ const Value *V = 0) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag, const Value *V) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
@@ -647,7 +668,8 @@ namespace {
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -721,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT);
+ NumRegs, RegisterVT, ValueVT, V);
Part += NumRegs;
Parts.clear();
}
@@ -736,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag,
+ const Value *V) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Get the list of the values's legal parts.
@@ -748,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
EVT RegisterVT = RegVTs[Value];
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT);
+ &Parts[Part], NumParts, RegisterVT, V);
Part += NumParts;
}
@@ -824,7 +847,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
AA = &aa;
GFI = gfi;
LibInfo = li;
- TD = DAG.getTarget().getTargetData();
+ TD = DAG.getTarget().getDataLayout();
+ Context = DAG.getContext();
LPadToCallSiteMap.clear();
}
@@ -992,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
resolveDanglingDebugInfo(V, N);
return N;
}
@@ -1147,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
}
llvm_unreachable("Can't get register for value!");
@@ -1203,9 +1227,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
const Function *F = I.getParent()->getParent();
- if (F->paramHasAttr(0, Attribute::SExt))
+ if (F->getRetAttributes().hasAttribute(Attributes::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->paramHasAttr(0, Attribute::ZExt))
+ else if (F->getRetAttributes().hasAttribute(Attributes::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
@@ -1216,11 +1240,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, ExtendKind);
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (F->paramHasAttr(0, Attribute::InReg))
+ if (F->getRetAttributes().hasAttribute(Attributes::InReg))
Flags.setInReg();
// Propagate extension type if any
@@ -1231,7 +1255,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
- /*isfixed=*/true));
+ /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
}
@@ -1601,7 +1625,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
// Update successor info
addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
- addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1762,6 +1789,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
@@ -1799,8 +1827,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
ISD::SETNE);
}
- addSuccessorWithWeight(SwitchBB, B.TargetBB);
- addSuccessorWithWeight(SwitchBB, NextMBB);
+ // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+ addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+ // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+ addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
@@ -1923,6 +1953,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
@@ -1956,8 +1987,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
ISD::SETEQ);
// Update successor info.
- addSuccessorWithWeight(SwitchBB, Small.BB);
- addSuccessorWithWeight(SwitchBB, Default);
+ // Both Small and Big will jump to Small.BB, so we sum up the weights.
+ addSuccessorWithWeight(SwitchBB, Small.BB,
+ Small.ExtraWeight + Big.ExtraWeight);
+ addSuccessorWithWeight(SwitchBB, Default,
+ // The default destination is the first successor in IR.
+ BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
// Insert the true branch.
SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1975,14 +2010,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
// Order cases by weight so the most likely case will be checked first.
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ uint32_t UnhandledWeights = 0;
if (BPI) {
for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
- uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
- I->BB->getBasicBlock());
+ uint32_t IWeight = I->ExtraWeight;
+ UnhandledWeights += IWeight;
for (CaseItr J = CR.Range.first; J < I; ++J) {
- uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(),
- J->BB->getBasicBlock());
+ uint32_t JWeight = J->ExtraWeight;
if (IWeight > JWeight)
std::swap(*I, *J);
}
@@ -2031,10 +2065,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
LHS = I->Low; MHS = SV; RHS = I->High;
}
- uint32_t ExtraWeight = I->ExtraWeight;
+ // The false weight should be sum of all un-handled cases.
+ UnhandledWeights -= I->ExtraWeight;
CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
/* me */ CurBlock,
- /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
+ /* trueweight */ I->ExtraWeight,
+ /* falseweight */ UnhandledWeights);
// If emitting the first comparison, just call visitSwitchCase to emit the
// code into the current block. Otherwise, push the CaseBlock onto the
@@ -2079,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
TSize += I->size();
- if (!areJTsAllowed(TLI) || TSize.ult(4))
+ if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
return false;
APInt Range = ComputeRange(First, Last);
@@ -2134,13 +2170,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
}
}
+ // Calculate weight for each unique destination in CR.
+ DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+ if (FuncInfo.BPI)
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(I->BB);
+ if (Itr != DestWeights.end())
+ Itr->second += I->ExtraWeight;
+ else
+ DestWeights[I->BB] = I->ExtraWeight;
+ }
+
// Update successor info. Add one edge to each unique successor.
BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
E = DestBBs.end(); I != E; ++I) {
if (!SuccsHandled[(*I)->getNumber()]) {
SuccsHandled[(*I)->getNumber()] = true;
- addSuccessorWithWeight(JumpTableBB, *I);
+ DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+ DestWeights.find(*I);
+ addSuccessorWithWeight(JumpTableBB, *I,
+ Itr != DestWeights.end() ? Itr->second : 0);
}
}
@@ -2371,7 +2422,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
if (i == count) {
assert((count < 3) && "Too much destinations to test!");
- CasesBits.push_back(CaseBits(0, Dest, 0));
+ CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
count++;
}
@@ -2380,6 +2431,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
uint64_t lo = (lowValue - lowBound).getZExtValue();
uint64_t hi = (highValue - lowBound).getZExtValue();
+ CasesBits[i].ExtraWeight += I->ExtraWeight;
for (uint64_t j = lo; j <= hi; j++) {
CasesBits[i].Mask |= 1ULL << j;
@@ -2407,7 +2459,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
CurMF->insert(BBI, CaseBB);
BTC.push_back(BitTestCase(CasesBits[i].Mask,
CaseBB,
- CasesBits[i].BB));
+ CasesBits[i].BB, CasesBits[i].ExtraWeight));
// Put SV in a virtual register to make it available from the new blocks.
ExportFromCurrentBlock(SV);
@@ -2435,30 +2487,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
Clusterifier TheClusterifier;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
// Start with "simple" cases
for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
i != e; ++i) {
const BasicBlock *SuccBB = i.getCaseSuccessor();
MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
- TheClusterifier.add(i.getCaseValueEx(), SMBB);
+ TheClusterifier.add(i.getCaseValueEx(), SMBB,
+ BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
}
TheClusterifier.optimize();
- BranchProbabilityInfo *BPI = FuncInfo.BPI;
size_t numCmps = 0;
for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
Clusterifier::Cluster &C = *i;
- unsigned W = 0;
- if (BPI) {
- W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock());
- if (!W)
- W = 16;
- W *= C.first.Weight;
- BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W);
- }
+ // Update edge weight for the cluster.
+ unsigned W = C.first.Weight;
// FIXME: Currently work with ConstantInt based numbers.
// Changing it to APInt based is a pretty heavy for this commit.
@@ -2540,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
continue;
- // If the switch has more than 5 blocks, and at least 40% dense, and the
+ // If the switch has more than N blocks, and is at least 40% dense, and the
// target supports indirect branches, then emit a jump table rather than
// lowering the switch to a binary tree of conditional branches.
+ // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
continue;
@@ -2556,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
- SmallVector<BasicBlock*, 32> succs;
- succs.reserve(I.getNumSuccessors());
- for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
- succs.push_back(I.getSuccessor(i));
- array_pod_sort(succs.begin(), succs.end());
- succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
- for (unsigned i = 0, e = succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB);
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
addSuccessorWithWeight(IndirectBrMBB, Succ);
}
@@ -3160,9 +3208,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
return; // getValue will auto-populate this.
Type *Ty = I.getAllocatedType();
- uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
unsigned Align =
- std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+ std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
I.getAlignment());
SDValue AllocSize = getValue(I.getArraySize());
@@ -3460,7 +3508,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getType());
+ EVT VT = TLI.getValueType(I.getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3538,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue InChain = getRoot();
- EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+ EVT VT = TLI.getValueType(I.getValueOperand()->getType());
if (I.getAlignment() * 8 < VT.getSizeInBits())
report_fatal_error("Cannot generate unaligned atomic store");
@@ -4352,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, LHS.getValueType());
const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+ if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) ||
// If optimizing for size, don't insert too many multiplies. This
// inserts up to 5 multiplies.
CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -4850,7 +4898,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, MVT::i32));
+ DAG.getIntPtrConstant(Idx));
+ setValue(&I, Res);
+ return 0;
+ }
+ case Intrinsic::x86_avx_vextractf128_pd_256:
+ case Intrinsic::x86_avx_vextractf128_ps_256:
+ case Intrinsic::x86_avx_vextractf128_si_256:
+ case Intrinsic::x86_avx2_vextracti128: {
+ DebugLoc dl = getCurDebugLoc();
+ EVT DestVT = TLI.getValueType(I.getType());
+ uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+ DestVT.getVectorNumElements();
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+ getValue(I.getArgOperand(0)),
+ DAG.getIntPtrConstant(Idx));
setValue(&I, Res);
return 0;
}
@@ -5113,10 +5175,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
+ case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
- DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
return 0;
}
TargetLowering::ArgListTy Args;
@@ -5131,10 +5196,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Result.second);
return 0;
}
- case Intrinsic::debugtrap: {
- DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
- return 0;
- }
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
@@ -5177,14 +5239,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
rw==1)); /* write */
return 0;
}
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return 0;
+
+ SmallVector<Value *, 4> Allocas;
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+
+ for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
+ E = Allocas.end(); Object != E; ++Object) {
+ AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+ unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+ Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+ DAG.setRoot(Res);
+ }
+ }
case Intrinsic::invariant_start:
- case Intrinsic::lifetime_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
return 0;
case Intrinsic::invariant_end:
- case Intrinsic::lifetime_end:
// Discard region information.
return 0;
case Intrinsic::donothing:
@@ -5220,9 +5308,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
int DemoteStackIdx = -100;
if (!CanLowerReturn) {
- uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
FTy->getReturnType());
- unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
@@ -5254,12 +5342,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Entry.Node = ArgNode; Entry.Ty = V->getType();
unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attributes::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attributes::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attributes::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attributes::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal);
Entry.Alignment = CS.getParamAlignment(attrInd);
Args.push_back(Entry);
}
@@ -5687,7 +5775,7 @@ public:
/// MVT::Other.
EVT getCallOperandValEVT(LLVMContext &Context,
const TargetLowering &TLI,
- const TargetData *TD) const {
+ const DataLayout *TD) const {
if (CallOperandVal == 0) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
@@ -5991,8 +6079,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Otherwise, create a stack slot and emit a store to it before the
// asm.
Type *Ty = OpVal->getType();
- uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
- unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+ uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+ unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
@@ -6040,12 +6128,36 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
- // Remember the HasSideEffect and AlignStack bits as operand 3.
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
unsigned ExtraInfo = 0;
if (IA->hasSideEffects())
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ // Set the asm dialect.
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for other constriants as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ ExtraInfo |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ ExtraInfo |= InlineAsm::Extra_MayStore;
+ }
+ }
+
AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
TLI.getPointerTy()));
@@ -6155,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(),
DAG, AsmNodeOperands);
@@ -6237,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
DAG, AsmNodeOperands);
@@ -6268,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, CS.getInstruction());
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6308,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
const Value *Ptr = IndirectStoresToEmit[i].second;
SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
- Chain, &Flag);
+ Chain, &Flag, IA);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -6338,7 +6450,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
- const TargetData &TD = *TLI.getTargetData();
+ const DataLayout &TD = *TLI.getDataLayout();
SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
getRoot(), getValue(I.getOperand(0)),
DAG.getSrcValue(I.getOperand(0)),
@@ -6384,7 +6496,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment =
- getTargetData()->getABITypeAlignment(ArgTy);
+ getDataLayout()->getABITypeAlignment(ArgTy);
if (Args[i].isZExt)
Flags.setZExt();
@@ -6398,7 +6510,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setByVal();
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+ Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
// For ByVal, alignment should come from FE. BE will guess if this
// info is not there but there are cases it cannot get right.
unsigned FrameAlign;
@@ -6423,12 +6535,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
- PartVT, ExtendKind);
+ PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
- i < CLI.NumFixedArgs);
+ i < CLI.NumFixedArgs,
+ i, j*Parts[j].getValueType().getStoreSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
@@ -6504,7 +6617,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT,
+ NumRegs, RegisterVT, VT, NULL,
AssertOp));
CurReg += NumRegs;
}
@@ -6543,7 +6656,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
PendingExports.push_back(Chain);
}
@@ -6573,7 +6686,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
const Function &F = *LLVMBB->getParent();
SelectionDAG &DAG = SDB->DAG;
DebugLoc dl = SDB->getCurDebugLoc();
- const TargetData *TD = TLI.getTargetData();
+ const DataLayout *TD = TLI.getDataLayout();
SmallVector<ISD::InputArg, 16> Ins;
// Check whether the function can return without sret-demotion.
@@ -6591,7 +6704,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
- ISD::InputArg RetArg(Flags, RegisterVT, true);
+ ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
Ins.push_back(RetArg);
}
@@ -6610,15 +6723,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
unsigned OriginalAlignment =
TD->getABITypeAlignment(ArgTy);
- if (F.paramHasAttr(Idx, Attribute::ZExt))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
Flags.setZExt();
- if (F.paramHasAttr(Idx, Attribute::SExt))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
Flags.setSExt();
- if (F.paramHasAttr(Idx, Attribute::InReg))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
Flags.setInReg();
- if (F.paramHasAttr(Idx, Attribute::StructRet))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet))
Flags.setSRet();
- if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) {
Flags.setByVal();
PointerType *Ty = cast<PointerType>(I->getType());
Type *ElementTy = Ty->getElementType();
@@ -6632,14 +6745,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
FrameAlign = TLI.getByValTypeAlignment(ElementTy);
Flags.setByValAlign(FrameAlign);
}
- if (F.paramHasAttr(Idx, Attribute::Nest))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest))
Flags.setNest();
Flags.setOrigAlign(OriginalAlignment);
EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+ ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+ Idx-1, i*RegisterVT.getStoreSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -6685,7 +6799,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, AssertOp);
+ RegVT, VT, NULL, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6719,14 +6833,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
if (!I->use_empty()) {
ISD::NodeType AssertOp = ISD::DELETED_NODE;
- if (F.paramHasAttr(Idx, Attribute::SExt))
+ if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
AssertOp = ISD::AssertSext;
- else if (F.paramHasAttr(Idx, Attribute::ZExt))
+ else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
- AssertOp));
+ NULL, AssertOp));
}
i += NumParts;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 4090002..9e46d96 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -66,7 +66,7 @@ class ShuffleVectorInst;
class SIToFPInst;
class StoreInst;
class SwitchInst;
-class TargetData;
+class DataLayout;
class TargetLibraryInfo;
class TargetLowering;
class TruncInst;
@@ -150,9 +150,11 @@ private:
uint64_t Mask;
MachineBasicBlock* BB;
unsigned Bits;
+ uint32_t ExtraWeight;
- CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
- Mask(mask), BB(bb), Bits(bits) { }
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
+ uint32_t Weight):
+ Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
};
typedef std::vector<Case> CaseVector;
@@ -247,11 +249,13 @@ private:
typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
struct BitTestCase {
- BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
- Mask(M), ThisBB(T), TargetBB(Tr) { }
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ uint32_t Weight):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
+ uint32_t ExtraWeight;
};
typedef SmallVector<BitTestCase, 3> BitTestInfo;
@@ -281,7 +285,7 @@ public:
const TargetMachine &TM;
const TargetLowering &TLI;
SelectionDAG &DAG;
- const TargetData *TD;
+ const DataLayout *TD;
AliasAnalysis *AA;
const TargetLibraryInfo *LibInfo;
@@ -325,7 +329,7 @@ public:
CodeGenOpt::Level ol)
: SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
- HasTailCall(false), Context(dag.getContext()) {
+ HasTailCall(false) {
}
void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
@@ -452,6 +456,7 @@ public:
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
void visitBitTestCase(BitTestBlock &BB,
MachineBasicBlock* NextMBB,
+ uint32_t BranchWeightToNext,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 13cd011..6f3ce7a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
// Bit manipulation
case ISD::BSWAP: return "bswap";
@@ -331,7 +333,7 @@ void SDNode::dump(const SelectionDAG *G) const {
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
- OS << (void*)this << ": ";
+ OS << (const void*)this << ": ";
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ",";
@@ -473,11 +475,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << "<" << *M->getMemOperand() << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
OS << "<";
WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
OS << ", ";
WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
if (unsigned int TF = BA->getTargetFlags())
OS << " [TF=" << TF << ']';
}
@@ -559,7 +566,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
child->printr(OS, G);
once.insert(child);
} else { // Just the address. FIXME: also print the child's opcode.
- OS << (void*)child;
+ OS << (const void*)child;
if (unsigned RN = N->getOperand(i).getResNo())
OS << ":" << RN;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4e5e3ba..c314fa5 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -474,6 +474,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MRI.replaceRegWith(From, To);
}
+ // Freeze the set of reserved registers now that MachineFrameInfo has been
+ // set up. All the information required by getReservedRegs() should be
+ // available now.
+ MRI.freezeReservedRegs(*MF);
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -554,7 +559,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#endif
{
BlockNumber = FuncInfo->MBB->getNumber();
- BlockName = MF->getFunction()->getName().str() + ":" +
+ BlockName = MF->getName().str() + ":" +
FuncInfo->MBB->getBasicBlock()->getName().str();
}
DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
@@ -1209,7 +1214,12 @@ SelectionDAGISel::FinishBasicBlock() {
CodeGenAndEmitDAG();
}
+ uint32_t UnhandledWeight = 0;
+ for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
+ UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
+
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+ UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
// Set the current basic block to the mbb we wish to insert the code into
FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
@@ -1217,12 +1227,14 @@ SelectionDAGISel::FinishBasicBlock() {
if (j+1 != ej)
SDB->visitBitTestCase(SDB->BitTestCases[i],
SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ UnhandledWeight,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
else
SDB->visitBitTestCase(SDB->BitTestCases[i],
SDB->BitTestCases[i].Default,
+ UnhandledWeight,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
@@ -1794,10 +1806,13 @@ WalkChainUsers(const SDNode *ChainedNode,
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
- if (User->getOpcode() == ISD::CopyToReg ||
- User->getOpcode() == ISD::CopyFromReg ||
- User->getOpcode() == ISD::INLINEASM ||
- User->getOpcode() == ISD::EH_LABEL) {
+ unsigned UserOpcode = User->getOpcode();
+ if (UserOpcode == ISD::CopyToReg ||
+ UserOpcode == ISD::CopyFromReg ||
+ UserOpcode == ISD::INLINEASM ||
+ UserOpcode == ISD::EH_LABEL ||
+ UserOpcode == ISD::LIFETIME_START ||
+ UserOpcode == ISD::LIFETIME_END) {
// If their node ID got reset to -1 then they've already been selected.
// Treat them like a MachineOpcode.
if (User->getNodeId() == -1)
@@ -1994,7 +2009,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
return Res;
}
-/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+/// CheckSame - Implements OP_CheckSame.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
@@ -2213,6 +2228,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::CopyFromReg:
case ISD::CopyToReg:
case ISD::EH_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
return 0;
case ISD::AssertSext:
@@ -2981,7 +2998,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
N->printrFull(Msg, CurDAG);
- Msg << "\nIn function: " << MF->getFunction()->getName();
+ Msg << "\nIn function: " << MF->getName();
} else {
bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
unsigned iid =
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 173ffac..3921635 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -14,7 +14,6 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/Constants.h"
#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -50,7 +49,7 @@ namespace llvm {
template<typename EdgeIter>
static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
- return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
}
/// edgeTargetsEdgeSource - This method returns true if this outgoing edge
@@ -73,7 +72,7 @@ namespace llvm {
}
static std::string getGraphName(const SelectionDAG *G) {
- return G->getMachineFunction().getFunction()->getName();
+ return G->getMachineFunction().getName();
}
static bool renderGraphFromBottomUp() {
@@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
void SelectionDAG::viewGraph(const std::string &Title) {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
false, Title);
#else
errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6820175..49f55e2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -14,7 +14,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -515,7 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
/// NOTE: The constructor takes ownership of TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
- : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+ : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
@@ -583,8 +583,13 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
IsLittleEndian = TD->isLittleEndian();
- PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
@@ -613,6 +618,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ShouldFoldAtomicFences = false;
InsertFencesForAtomic = false;
SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -624,7 +630,7 @@ TargetLowering::~TargetLowering() {
}
MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8*TD->getPointerSize());
+ return MVT::getIntegerVT(8*TD->getPointerSize(0));
}
/// canOpTrap - Returns true if the operation can trap for the value type.
@@ -772,7 +778,7 @@ void TargetLowering::computeRegisterProperties() {
LegalIntReg = IntReg;
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
- (MVT::SimpleValueType)LegalIntReg;
+ (const MVT::SimpleValueType)LegalIntReg;
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
@@ -898,10 +904,9 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return NULL;
}
-
EVT TargetLowering::getSetCCResultType(EVT VT) const {
assert(!VT.isVector() && "No default SetCC type for vectors!");
- return PointerTy.SimpleTy;
+ return getPointerTy(0).SimpleTy;
}
MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
@@ -997,9 +1002,9 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr & Attribute::SExt)
+ if (attr.hasAttribute(Attributes::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr & Attribute::ZExt)
+ else if (attr.hasAttribute(Attributes::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1017,18 +1022,17 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr & Attribute::InReg)
+ if (attr.hasAttribute(Attributes::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr & Attribute::SExt)
+ if (attr.hasAttribute(Attributes::SExt))
Flags.setSExt();
- else if (attr & Attribute::ZExt)
+ else if (attr.hasAttribute(Attributes::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i) {
- Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
- }
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0));
}
}
@@ -1062,7 +1066,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
- return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
return Table;
}
@@ -2441,7 +2445,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
- uint64_t EqVal;
+ uint64_t EqVal = 0;
switch (getBooleanContents(N0.getValueType().isVector())) {
case UndefinedBooleanContent:
case ZeroOrOneBooleanContent:
@@ -2954,8 +2958,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
break;
}
- } else if (dyn_cast<PointerType>(OpTy)) {
- OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(
+ 8*TD->getPointerSize(PT->getAddressSpace()));
} else {
OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index a081e3c..f769b44 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
- : TD(TM.getTargetData()) {
+ : TD(TM.getDataLayout()) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
index 21ae2f5..4fbe1b3 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
// via --shrink-wrap-func=<funcname>.
#ifndef NDEBUG
if (ShrinkWrapFunc != "") {
- std::string MFName = MF->getFunction()->getName().str();
+ std::string MFName = MF->getName().str();
ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
}
#endif
@@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
DEBUG(if (ShrinkWrapThisFunction) {
dbgs() << "Place CSR spills/restores for "
- << MF->getFunction()->getName() << "\n";
+ << MF->getName() << "\n";
});
if (calculateSets(Fn))
@@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
// If no CSRs used, we are done.
if (CSI.empty()) {
DEBUG(if (ShrinkWrapThisFunction)
- dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ dbgs() << "DISABLED: " << Fn.getName()
<< ": uses no callee-saved registers\n");
return false;
}
@@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
// implementation to functions with <= 500 MBBs.
if (Fn.size() > 500) {
DEBUG(if (ShrinkWrapThisFunction)
- dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ dbgs() << "DISABLED: " << Fn.getName()
<< ": too large (" << Fn.size() << " MBBs)\n");
ShrinkWrapThisFunction = false;
}
@@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
}
if (allCSRUsesInEntryBlock) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
<< ": all CSRs used in EntryBlock\n");
ShrinkWrapThisFunction = false;
} else {
@@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
allCSRsUsedInEntryFanout = false;
}
if (allCSRsUsedInEntryFanout) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
<< ": all CSRs used in imm successors of EntryBlock\n");
ShrinkWrapThisFunction = false;
}
@@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
if (dominatesExitNodes) {
CSRUsedInChokePoints |= CSRUsed[MBB];
if (CSRUsedInChokePoints == UsedCSRegs) {
- DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+ DEBUG(dbgs() << "DISABLED: " << Fn.getName()
<< ": all CSRs used in choke point(s) at "
<< getBasicBlockName(MBB) << "\n");
ShrinkWrapThisFunction = false;
@@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
return false;
DEBUG({
- dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+ dbgs() << "ENABLED: " << Fn.getName();
if (HasFastExitPath)
dbgs() << " (fast exit path)";
dbgs() << "\n";
@@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
dbgs() << "-----------------------------------------------------------\n";
dbgs() << "total iterations = " << iterations << " ( "
- << Fn.getFunction()->getName()
+ << Fn.getName()
<< " " << numSRReducedThisFunc
<< " " << Fn.size()
<< " )\n";
@@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() {
if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
if (restored != spilled) {
CSRegSet notRestored = (spilled - restored);
- DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ DEBUG(dbgs() << MF->getName() << ": "
<< stringifyCSRegSet(notRestored)
<< " spilled at " << getBasicBlockName(MBB)
<< " are never restored on path to return "
@@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() {
}
if (spilled != restored) {
CSRegSet notSpilled = (restored - spilled);
- DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+ DEBUG(dbgs() << MF->getName() << ": "
<< stringifyCSRegSet(notSpilled)
<< " restored at " << getBasicBlockName(MBB)
<< " are never spilled\n");
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 980bd74..4b566fc 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -30,7 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -191,58 +191,43 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
unsigned Align =
- TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
+ TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
FuncCtx =
new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
// Fill in the function context structure.
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Value *Zero = ConstantInt::get(Int32Ty, 0);
- Value *One = ConstantInt::get(Int32Ty, 1);
- Value *Two = ConstantInt::get(Int32Ty, 2);
- Value *Three = ConstantInt::get(Int32Ty, 3);
- Value *Four = ConstantInt::get(Int32Ty, 4);
-
- Value *Idxs[2] = { Zero, 0 };
-
for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
LandingPadInst *LPI = LPads[I];
IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
// Reference the __data field.
- Idxs[1] = Two;
- Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+ Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
// The exception values come back in context->__data[0].
- Idxs[1] = Zero;
- Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
+ Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0,
+ "exception_gep");
Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
- ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
- Idxs[1] = One;
- Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+ Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1,
+ "exn_selector_gep");
Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
substituteLPadValues(LPI, ExnVal, SelVal);
}
// Personality function
- Idxs[1] = Three;
+ IRBuilder<> Builder(EntryBB->getTerminator());
if (!PersonalityFn)
PersonalityFn = LPads[0]->getPersonalityFn();
- Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep",
- EntryBB->getTerminator());
- new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
- EntryBB->getTerminator());
+ Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3,
+ "pers_fn_gep");
+ Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true);
// LSDA address
- Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
- EntryBB->getTerminator());
- Idxs[1] = Four;
- Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
- EntryBB->getTerminator());
- new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+ Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
+ Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+ Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
return FuncCtx;
}
@@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Value *FuncCtx =
setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
BasicBlock *EntryBB = F.begin();
- Type *Int32Ty = Type::getInt32Ty(F.getContext());
-
- Value *Idxs[2] = {
- ConstantInt::get(Int32Ty, 0), 0
- };
+ IRBuilder<> Builder(EntryBB->getTerminator());
// Get a reference to the jump buffer.
- Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep",
- EntryBB->getTerminator());
+ Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
// Save the frame pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
- EntryBB->getTerminator());
+ Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
- Value *Val = CallInst::Create(FrameAddrFn,
- ConstantInt::get(Int32Ty, 0),
- "fp",
- EntryBB->getTerminator());
- new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+ Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+ Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
// Save the stack pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
- EntryBB->getTerminator());
+ Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
- Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
- new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+ Val = Builder.CreateCall(StackAddrFn, "sp");
+ Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
// Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator());
+ Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
+ Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
// Store a pointer to the function context so that the back-end will know
// where to look for it.
- Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator());
+ Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+ Builder.CreateCall(FuncCtxFn, FuncCtxArg);
// At this point, we are all set up, update the invoke instructions to mark
// their call_site values.
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index c8c3fb3..95faafab 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SlotIndexes::dump() const {
for (IndexList::const_iterator itr = indexList.begin();
itr != indexList.end(); ++itr) {
@@ -159,6 +160,7 @@ void SlotIndexes::dump() const {
dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
<< MBBRanges[i].second << ")\n";
}
+#endif
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
@@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const {
os << "invalid";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// Dump a SlotIndex to stderr.
void SlotIndex::dump() const {
print(dbgs());
dbgs() << "\n";
}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 4a2b7ec..dca15ee 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
Edit->anyRematerializable(0);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SplitEditor::dump() const {
if (RegAssign.empty()) {
dbgs() << " empty\n";
@@ -366,6 +367,7 @@ void SplitEditor::dump() const {
dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
dbgs() << '\n';
}
+#endif
VNInfo *SplitEditor::defValue(unsigned RegIdx,
const VNInfo *ParentVNI,
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 0000000..1cbee84
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,783 @@
+//===-- StackColoring.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+// offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "MachineTraceMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Instructions.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+ cl::init(false), cl::Hidden,
+ cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that are broken"));
+
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas,
+ "Number of allocas that escaped the lifetime region");
+
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+ MachineFrameInfo *MFI;
+ MachineFunction *MF;
+
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+ /// Maps active slots (per bit) for each basic block.
+ DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness;
+
+ /// Maps serial numbers to basic blocks.
+ DenseMap<MachineBasicBlock*, int> BasicBlocks;
+ /// Maps basic blocks to a serial number.
+ SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering;
+
+ /// Maps liveness intervals for each slot.
+ SmallVector<LiveInterval*, 16> Intervals;
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+ /// SlotIndex analysis object.
+ SlotIndexes *Indexes;
+
+ /// The list of lifetime markers found. These markers are to be removed
+ /// once the coloring is done.
+ SmallVector<MachineInstr*, 8> Markers;
+
+ /// SlotSizeSorter - A Sort utility for arranging stack slots according
+ /// to their size.
+ struct SlotSizeSorter {
+ MachineFrameInfo *MFI;
+ SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
+ bool operator()(int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1) return false;
+ if (RHS == -1) return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ }
+};
+
+public:
+ static char ID;
+ StackColoring() : MachineFunctionPass(ID) {
+ initializeStackColoringPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+ /// Debug.
+ void dump();
+
+ /// Removes all of the lifetime marker instructions from the function.
+ /// \returns true if any markers were removed.
+ bool removeAllMarkers();
+
+ /// Scan the machine function and find all of the lifetime markers.
+ /// Record the findings in the BEGIN and END vectors.
+ /// \returns the number of markers found.
+ unsigned collectMarkers(unsigned NumSlot);
+
+ /// Perform the dataflow calculation and calculate the lifetime for each of
+ /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+ /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+ /// in and out blocks.
+ void calculateLocalLiveness();
+
+ /// Construct the LiveIntervals for the slots.
+ void calculateLiveIntervals(unsigned NumSlots);
+
+ /// Go over the machine function and change instructions which use stack
+ /// slots to use the joint slots.
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+ /// The input program may contain intructions which are not inside lifetime
+ /// markers. This can happen due to a bug in the compiler or due to a bug in
+ /// user code (for example, returning a reference to a local variable).
+ /// This procedure checks all of the instructions in the function and
+ /// invalidates lifetime ranges which do not contain all of the instructions
+ /// which access that frame slot.
+ void removeInvalidSlotRanges();
+
+ /// Map entries which point to other entries to their destination.
+ /// A->B->C becomes A->C.
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+};
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void StackColoring::dump() {
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+ unsigned Num = BasicBlocks[*FI];
+ DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n");
+ Num = 0;
+ DEBUG(dbgs()<<"BEGIN : {");
+ for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i)
+ DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"END : {");
+ for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i)
+ DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+
+ DEBUG(dbgs()<<"LIVE_IN: {");
+ for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i)
+ DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" ");
+
+ DEBUG(dbgs()<<"}\n");
+ DEBUG(dbgs()<<"LIVEOUT: {");
+ for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i)
+ DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" ");
+ DEBUG(dbgs()<<"}\n");
+ }
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+ unsigned MarkersFound = 0;
+ // Scan the function to find all lifetime markers.
+ // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
+ // deterministic numbering, and because we'll need a post-order iteration
+ // later for solving the liveness dataflow problem.
+ for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
+ FI != FE; ++FI) {
+
+ // Assign a serial number to this basic block.
+ BasicBlocks[*FI] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(*FI);
+
+ BlockLiveness[*FI].Begin.resize(NumSlot);
+ BlockLiveness[*FI].End.resize(NumSlot);
+
+ for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
+ BI != BE; ++BI) {
+
+ if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
+ BI->getOpcode() != TargetOpcode::LIFETIME_END)
+ continue;
+
+ Markers.push_back(BI);
+
+ bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
+ MachineOperand &MI = BI->getOperand(0);
+ unsigned Slot = MI.getIndex();
+
+ MarkersFound++;
+
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
+ " with allocation: "<< Allocation->getName()<<"\n");
+ }
+
+ if (IsStart) {
+ BlockLiveness[*FI].Begin.set(Slot);
+ } else {
+ if (BlockLiveness[*FI].Begin.test(Slot)) {
+ // Allocas that start and end within a single block are handled
+ // specially when computing the LiveIntervals to avoid pessimizing
+ // the liveness propagation.
+ BlockLiveness[*FI].Begin.reset(Slot);
+ } else {
+ BlockLiveness[*FI].End.set(Slot);
+ }
+ }
+ }
+ }
+
+ // Update statistics.
+ NumMarkerSeen += MarkersFound;
+ return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ // Perform a standard reverse dataflow computation to solve for
+ // global liveness. The BEGIN set here is equivalent to KILL in the standard
+ // formulation, and END is equivalent to GEN. The result of this computation
+ // is a map from blocks to bitvectors where the bitvectors represent which
+ // allocas are live in/out of that block.
+ SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
+ BasicBlockNumbering.end());
+ unsigned NumSSMIters = 0;
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ ++NumSSMIters;
+
+ SmallPtrSet<MachineBasicBlock*, 8> NextBBSet;
+
+ for (SmallVector<MachineBasicBlock*, 8>::iterator
+ PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
+ PI != PE; ++PI) {
+
+ MachineBasicBlock *BB = *PI;
+ if (!BBSet.count(BB)) continue;
+
+ BitVector LocalLiveIn;
+ BitVector LocalLiveOut;
+
+ // Forward propagation from begins to ends.
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI)
+ LocalLiveIn |= BlockLiveness[*PI].LiveOut;
+ LocalLiveIn |= BlockLiveness[BB].End;
+ LocalLiveIn.reset(BlockLiveness[BB].Begin);
+
+ // Reverse propagation from ends to begins.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ LocalLiveOut |= BlockLiveness[*SI].LiveIn;
+ LocalLiveOut |= BlockLiveness[BB].Begin;
+ LocalLiveOut.reset(BlockLiveness[BB].End);
+
+ LocalLiveIn |= LocalLiveOut;
+ LocalLiveOut |= LocalLiveIn;
+
+ // After adopting the live bits, we need to turn-off the bits which
+ // are de-activated in this block.
+ LocalLiveOut.reset(BlockLiveness[BB].End);
+ LocalLiveIn.reset(BlockLiveness[BB].Begin);
+
+ // If we have both BEGIN and END markers in the same basic block then
+ // we know that the BEGIN marker comes after the END, because we already
+ // handle the case where the BEGIN comes before the END when collecting
+ // the markers (and building the BEGIN/END vectore).
+ // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
+ // BEGIN and END because it means that the value lives before and after
+ // this basic block.
+ BitVector LocalEndBegin = BlockLiveness[BB].End;
+ LocalEndBegin &= BlockLiveness[BB].Begin;
+ LocalLiveIn |= LocalEndBegin;
+ LocalLiveOut |= LocalEndBegin;
+
+ if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) {
+ changed = true;
+ BlockLiveness[BB].LiveIn |= LocalLiveIn;
+
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI)
+ NextBBSet.insert(*PI);
+ }
+
+ if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) {
+ changed = true;
+ BlockLiveness[BB].LiveOut |= LocalLiveOut;
+
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ NextBBSet.insert(*SI);
+ }
+ }
+
+ BBSet = NextBBSet;
+ }// while changed.
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+ SmallVector<SlotIndex, 16> Finishes;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
+ MBB != MBBe; ++MBB) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+ Finishes.clear();
+ Finishes.resize(NumSlots);
+
+ // Create the interval for the basic blocks with lifetime markers in them.
+ for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(),
+ e = Markers.end(); it != e; ++it) {
+ MachineInstr *MI = *it;
+ if (MI->getParent() != MBB)
+ continue;
+
+ assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Invalid Lifetime marker");
+
+ bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
+ MachineOperand &Mo = MI->getOperand(0);
+ int Slot = Mo.getIndex();
+ assert(Slot >= 0 && "Invalid slot");
+
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+
+ if (IsStart) {
+ if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+ Finishes[Slot] = ThisIndex;
+ }
+ }
+
+ // Create the interval of the blocks that we previously found to be 'alive'.
+ BitVector Alive = BlockLiveness[MBB].LiveIn;
+ Alive |= BlockLiveness[MBB].LiveOut;
+
+ if (Alive.any()) {
+ for (int pos = Alive.find_first(); pos != -1;
+ pos = Alive.find_next(pos)) {
+ if (!Starts[pos].isValid())
+ Starts[pos] = Indexes->getMBBStartIdx(MBB);
+ if (!Finishes[pos].isValid())
+ Finishes[pos] = Indexes->getMBBEndIdx(MBB);
+ }
+ }
+
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+ if (!Starts[i].isValid())
+ continue;
+
+ assert(Starts[i] && Finishes[i] && "Invalid interval");
+ VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+ SlotIndex S = Starts[i];
+ SlotIndex F = Finishes[i];
+ if (S < F) {
+ // We have a single consecutive region.
+ Intervals[i]->addRange(LiveRange(S, F, ValNum));
+ } else {
+ // We have two non consecutive regions. This happens when
+ // LIFETIME_START appears after the LIFETIME_END marker.
+ SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
+ SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
+ Intervals[i]->addRange(LiveRange(NewStart, F, ValNum));
+ Intervals[i]->addRange(LiveRange(S, NewFin, ValNum));
+ }
+ }
+ }
+}
+
+bool StackColoring::removeAllMarkers() {
+ unsigned Count = 0;
+ for (unsigned i = 0; i < Markers.size(); ++i) {
+ Markers[i]->eraseFromParent();
+ Count++;
+ }
+ Markers.clear();
+
+ DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+ return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+ unsigned FixedInstr = 0;
+ unsigned FixedMemOp = 0;
+ unsigned FixedDbg = 0;
+ MachineModuleInfo *MMI = &MF->getMMI();
+
+ // Remap debug information that refers to stack slots.
+ MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+ for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+ VE = VMap.end(); VI != VE; ++VI) {
+ const MDNode *Var = VI->first;
+ if (!Var) continue;
+ std::pair<unsigned, DebugLoc> &VP = VI->second;
+ if (SlotRemap.count(VP.first)) {
+ DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
+ VP.first = SlotRemap[VP.first];
+ FixedDbg++;
+ }
+ }
+
+ // Keep a list of *allocas* which need to be remapped.
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+ for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
+ e = SlotRemap.end(); it != e; ++it) {
+ const AllocaInst *From = MFI->getObjectAllocation(it->first);
+ const AllocaInst *To = MFI->getObjectAllocation(it->second);
+ assert(To && From && "Invalid allocation object");
+ Allocas[From] = To;
+ }
+
+ // Remap all instructions to the new stack slots.
+ MachineFunction::iterator BB, BBE;
+ MachineBasicBlock::iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ // Skip lifetime markers. We'll remove them soon.
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END)
+ continue;
+
+ // Update the MachineMemOperand to use the new alloca.
+ for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
+ E = I->memoperands_end(); MM != E; ++MM) {
+ MachineMemOperand *MMO = *MM;
+
+ const Value *V = MMO->getValue();
+
+ if (!V)
+ continue;
+
+ // Climb up and find the original alloca.
+ V = GetUnderlyingObject(V);
+ // If we did not find one, or if the one that we found is not in our
+ // map, then move on.
+ if (!V || !isa<AllocaInst>(V)) {
+ // Clear mem operand since we don't know for sure that it doesn't
+ // alias a merged alloca.
+ MMO->setValue(0);
+ continue;
+ }
+ const AllocaInst *AI= cast<AllocaInst>(V);
+ if (!Allocas.count(AI))
+ continue;
+
+ MMO->setValue(Allocas[AI]);
+ FixedMemOp++;
+ }
+
+ // Update all of the machine instruction operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+ int FromSlot = MO.getIndex();
+
+ // Don't touch arguments.
+ if (FromSlot<0)
+ continue;
+
+ // Only look at mapped slots.
+ if (!SlotRemap.count(FromSlot))
+ continue;
+
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+ bool TouchesMemory = I->mayLoad() || I->mayStore();
+ // If we *don't* protect the user from escaped allocas, don't bother
+ // validating the instructions.
+ if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ LiveInterval *Interval = Intervals[FromSlot];
+ assert(Interval->find(Index) != Interval->end() &&
+ "Found instruction usage outside of live range.");
+ }
+#endif
+
+ // Fix the machine instructions.
+ int ToSlot = SlotRemap[FromSlot];
+ MO.setIndex(ToSlot);
+ FixedInstr++;
+ }
+ }
+
+ DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+ MachineFunction::iterator BB, BBE;
+ MachineBasicBlock::iterator I, IE;
+ for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
+ for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
+
+ if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
+ I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+ continue;
+
+ // Some intervals are suspicious! In some cases we find address
+ // calculations outside of the lifetime zone, but not actual memory
+ // read or write. Memory accesses outside of the lifetime zone are a clear
+ // violation, but address calculations are okay. This can happen when
+ // GEPs are hoisted outside of the lifetime zone.
+ // So, in here we only check instructions which can read or write memory.
+ if (!I->mayLoad() && !I->mayStore())
+ continue;
+
+ // Check all of the machine operands.
+ for (unsigned i = 0 ; i < I->getNumOperands(); ++i) {
+ MachineOperand &MO = I->getOperand(i);
+
+ if (!MO.isFI())
+ continue;
+
+ int Slot = MO.getIndex();
+
+ if (Slot<0)
+ continue;
+
+ if (Intervals[Slot]->empty())
+ continue;
+
+ // Check that the used slot is inside the calculated lifetime range.
+ // If it is not, warn about it and invalidate the range.
+ LiveInterval *Interval = Intervals[Slot];
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ if (Interval->find(Index) == Interval->end()) {
+ Intervals[Slot]->clear();
+ DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+ EscapedAllocas++;
+ }
+ }
+ }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+ unsigned NumSlots) {
+ // Expunge slot remap map.
+ for (unsigned i=0; i < NumSlots; ++i) {
+ // If we are remapping i
+ if (SlotRemap.count(i)) {
+ int Target = SlotRemap[i];
+ // As long as our target is mapped to something else, follow it.
+ while (SlotRemap.count(Target)) {
+ Target = SlotRemap[Target];
+ SlotRemap[i] = Target;
+ }
+ }
+ }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+ DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: "
+ << ((const Value*)Func.getFunction())->getName() << '\n');
+ MF = &Func;
+ MFI = MF->getFrameInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ BlockLiveness.clear();
+ BasicBlocks.clear();
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+
+ // If there are no stack slots then there are no markers to remove.
+ if (!NumSlots)
+ return false;
+
+ SmallVector<int, 8> SortedSlots;
+
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+ unsigned TotalSize = 0;
+ DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
+ DEBUG(dbgs()<<"Slot structure:\n");
+
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+ DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+ TotalSize += MFI->getObjectSize(i);
+ }
+
+ DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+
+ // Don't continue because there are not enough lifetime markers, or the
+ // stack is too small, or we are told not to optimize the slots.
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+ DEBUG(dbgs()<<"Will not try to merge slots.\n");
+ return removeAllMarkers();
+ }
+
+ for (unsigned i=0; i < NumSlots; ++i) {
+ LiveInterval *LI = new LiveInterval(i, 0);
+ Intervals.push_back(LI);
+ LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+ SortedSlots.push_back(i);
+ }
+
+ // Calculate the liveness of each block.
+ calculateLocalLiveness();
+
+ // Propagate the liveness information.
+ calculateLiveIntervals(NumSlots);
+
+ // Search for allocas which are used outside of the declared lifetime
+ // markers.
+ if (ProtectFromEscapedAllocas)
+ removeInvalidSlotRanges();
+
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
+
+ // Do not bother looking at empty intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
+
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and look
+ // for disjoint slots. When you find disjoint slots, merge the samller one
+ // into the bigger one and update the live interval. Remove the small alloca
+ // and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI));
+
+ bool Chanded = true;
+ while (Chanded) {
+ Chanded = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
+ continue;
+
+ for (unsigned J=I+1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
+
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
+ LiveInterval *First = Intervals[FirstSlot];
+ LiveInterval *Second = Intervals[SecondSlot];
+ assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+ // Merge disjoint slots.
+ if (!First->overlaps(*Second)) {
+ Chanded = true;
+ First->MergeRangesInAsValue(*Second, First->getValNumInfo(0));
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+ SecondSlot<<" together.\n");
+ unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
+ MFI->getObjectAlignment(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots+=1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
+ }
+ }
+ }// While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
+ ReducedSize<<" bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+
+ // Release the intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ delete Intervals[I];
+ }
+
+ return removeAllMarkers();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index f1eab1f..31e9ec0 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -26,18 +26,12 @@
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/Triple.h"
using namespace llvm;
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
- cl::desc("Lower bound for a buffer to be considered for "
- "stack protection"));
-
namespace {
class StackProtector : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -61,6 +55,11 @@ namespace {
/// check fails.
BasicBlock *CreateFailBB();
+ /// ContainsProtectableArray - Check whether the type either is an array or
+ /// contains an array of sufficient size so that we need stack protectors
+ /// for it.
+ bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
/// RequiresStackProtector - Check whether or not this function needs a
/// stack protector based upon the stack protector level.
bool RequiresStackProtector() const;
@@ -100,21 +99,50 @@ bool StackProtector::runOnFunction(Function &Fn) {
return InsertStackProtectors();
}
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+ if (!Ty) return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ const TargetMachine &TM = TLI->getTargetMachine();
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ Triple Trip(TM.getTargetTriple());
+
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ if (InStruct || !Trip.isOSDarwin())
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT))
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST) return false;
+
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end(); I != E; ++I)
+ if (ContainsProtectableArray(*I, true))
+ return true;
+
+ return false;
+}
+
/// RequiresStackProtector - Check whether or not this function needs a stack
/// protector based upon the stack protector level. The heuristic we use is to
/// add a guard variable to functions that call alloca, and functions with
/// buffers larger than SSPBufferSize bytes.
bool StackProtector::RequiresStackProtector() const {
- if (F->hasFnAttr(Attribute::StackProtectReq))
+ if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq))
return true;
- if (!F->hasFnAttr(Attribute::StackProtect))
+ if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect))
return false;
- const TargetData *TD = TLI->getTargetData();
- const TargetMachine &TM = TLI->getTargetMachine();
- Triple Trip(TM.getTargetTriple());
-
for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
BasicBlock *BB = I;
@@ -126,17 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
// protectors.
return true;
- if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
- // If we're on a non-Darwin platform, don't add stack protectors
- // unless the array is a character array.
- if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8))
- continue;
-
- // If an array has more than SSPBufferSize bytes of allocated space,
- // then we emit stack protectors.
- if (SSPBufferSize <= TD->getTypeAllocSize(AT))
- return true;
- }
+ if (ContainsProtectableArray(AI->getAllocatedType()))
+ return true;
}
}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 20da36e..d349abc 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -11,8 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stackcoloring"
-#include "llvm/Function.h"
+#define DEBUG_TYPE "stackslotcoloring"
#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Stack Slot Coloring **********\n"
- << "********** Function: "
- << MF.getFunction()->getName() << '\n';
+ << "********** Function: " << MF.getName() << '\n';
});
MFI = MF.getFrameInfo();
diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
index 5b06195..39fd600 100644
--- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp
@@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
}
void StrongPHIElimination::addReg(unsigned Reg) {
- if (RegNodeMap.count(Reg))
- return;
- RegNodeMap[Reg] = new (Allocator) Node(Reg);
+ Node *&N = RegNodeMap[Reg];
+ if (!N)
+ N = new (Allocator) Node(Reg);
}
StrongPHIElimination::Node*
@@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
assert(getRegColor(CopyReg) == CopyReg);
}
- if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
- InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+ // Insert into map if not already there.
+ InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor),
+ CopyInstr));
}
SrcMO.setReg(CopyReg);
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index a813fa6..1497d1b 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -552,7 +552,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// compensate for the duplication.
unsigned MaxDuplicateCount;
if (TailDuplicateSize.getNumOccurrences() == 0 &&
- MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ MF.getFunction()->getFnAttributes().
+ hasAttribute(Attributes::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
index ddee6b2..4439192 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
if (NewMI) {
// Create a new instruction.
- bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
MachineFunction &MF = *MI->getParent()->getParent();
- if (HasDef)
- return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
- .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
- .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
- else
- return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
- .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
- .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
+ MI = MF.CloneMachineInstr(MI);
}
if (HasDef) {
@@ -572,6 +563,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
/// Return the default expected latency for a def based on it's opcode.
unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
const MachineInstr *DefMI) const {
+ if (DefMI->isTransient())
+ return 0;
if (DefMI->mayLoad())
return SchedModel->LoadLatency;
if (isHighLatencyDef(DefMI->getOpcode()))
@@ -615,13 +608,13 @@ getOperandLatency(const InstrItineraryData *ItinData,
/// If we can determine the operand latency from the def only, without itinerary
/// lookup, do so. Otherwise return -1.
-static int computeDefOperandLatency(
- const TargetInstrInfo *TII, const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, bool FindMin) {
+int TargetInstrInfo::computeDefOperandLatency(
+ const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) const {
// Let the target hook getInstrLatency handle missing itineraries.
if (!ItinData)
- return TII->getInstrLatency(ItinData, DefMI);
+ return getInstrLatency(ItinData, DefMI);
// Return a latency based on the itinerary properties and defining instruction
// if possible. Some common subtargets don't require per-operand latency,
@@ -630,7 +623,7 @@ static int computeDefOperandLatency(
// If MinLatency is valid, call getInstrLatency. This uses Stage latency if
// it exists before defaulting to MinLatency.
if (ItinData->SchedModel->MinLatency >= 0)
- return TII->getInstrLatency(ItinData, DefMI);
+ return getInstrLatency(ItinData, DefMI);
// If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
// For empty itineraries, short-cirtuit the check and default to one cycle.
@@ -638,29 +631,42 @@ static int computeDefOperandLatency(
return 1;
}
else if(ItinData->isEmpty())
- return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
+ return defaultDefLatency(ItinData->SchedModel, DefMI);
// ...operand lookup required
return -1;
}
/// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use when the operand indices are already known.
+/// dependent def and use when the operand indices are already known. UseMI may
+/// be NULL for an unknown use.
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
///
-/// FindMin may be set to get the minimum vs. expected latency.
+/// Depending on the subtarget's itinerary properties, this may or may not need
+/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
+/// UseIdx to compute min latency.
unsigned TargetInstrInfo::
computeOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx,
bool FindMin) const {
- int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
+ int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin);
if (DefLatency >= 0)
return DefLatency;
assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
- int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ int OperLatency = 0;
+ if (UseMI)
+ OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
if (OperLatency >= 0)
return OperLatency;
@@ -673,77 +679,3 @@ computeOperandLatency(const InstrItineraryData *ItinData,
defaultDefLatency(ItinData->SchedModel, DefMI));
return InstrLatency;
}
-
-/// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
-/// unknown use. Depending on the subtarget's itinerary properties, this may or
-/// may not need to call getOperandLatency().
-///
-/// FindMin may be set to get the minimum vs. expected latency. Minimum
-/// latency is used for scheduling groups, while expected latency is for
-/// instruction cost and critical path.
-///
-/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
-/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
-unsigned TargetInstrInfo::
-computeOperandLatency(const InstrItineraryData *ItinData,
- const TargetRegisterInfo *TRI,
- const MachineInstr *DefMI, const MachineInstr *UseMI,
- unsigned Reg, bool FindMin) const {
-
- int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin);
- if (DefLatency >= 0)
- return DefLatency;
-
- assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
- // Find the definition of the register in the defining instruction.
- int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
- if (DefIdx != -1) {
- const MachineOperand &MO = DefMI->getOperand(DefIdx);
- if (MO.isReg() && MO.isImplicit() &&
- DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
- // This is an implicit def, getOperandLatency() won't return the correct
- // latency. e.g.
- // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
- // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
- // What we want is to compute latency between def of %D6/%D7 and use of
- // %Q3 instead.
- unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
- if (DefMI->getOperand(Op2).isReg())
- DefIdx = Op2;
- }
- // For all uses of the register, calculate the maxmimum latency
- int OperLatency = -1;
-
- // UseMI is null, then it must be a scheduling barrier.
- if (!UseMI) {
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
- }
- else {
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
- OperLatency = std::max(OperLatency, UseCycle);
- }
- }
- // If we found an operand latency, we're done.
- if (OperLatency >= 0)
- return OperLatency;
- }
- // No operand latency was found.
- unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
- // Expected latency is the max of the stage latency and itinerary props.
- if (!FindMin)
- InstrLatency = std::max(InstrLatency,
- defaultDefLatency(ItinData->SchedModel, DefMI));
- return InstrLatency;
-}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2a2fa9e..8f5d770 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Dwarf.h"
@@ -77,9 +77,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
Flags,
SectionKind::getDataRel(),
0, Label->getName());
- unsigned Size = TM.getTargetData()->getPointerSize();
+ unsigned Size = TM.getDataLayout()->getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
+ Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::Create(Size, getContext());
Streamer.EmitELFSize(Label, E);
@@ -247,7 +247,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
unsigned Align =
- TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
const char *SizeSpec = ".rodata.str1.";
if (Kind.isMergeable2ByteCString())
@@ -522,14 +522,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+ TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return UStringSection;
if (Kind.isMergeableConst()) {
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 0000000..ca3b0e0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,306 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+ return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+ return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+ // Dividend and Divisor will be naturally swapped as needed.
+ while(Divisor) {
+ unsigned Rem = Dividend % Divisor;
+ Dividend = Divisor;
+ Divisor = Rem;
+ };
+ return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+ unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+ assert((LCM >= A && LCM >= B) && "LCM overflow");
+ return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+ const TargetSubtargetInfo *sti,
+ const TargetInstrInfo *tii) {
+ SchedModel = sm;
+ STI = sti;
+ TII = tii;
+ STI->initInstrItins(InstrItins);
+
+ unsigned NumRes = SchedModel.getNumProcResourceKinds();
+ ResourceFactors.resize(NumRes);
+ ResourceLCM = SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ if (NumUnits > 0)
+ ResourceLCM = lcm(ResourceLCM, NumUnits);
+ }
+ MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+ }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrItineraries()) {
+ int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+ }
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// If we can determine the operand latency from the def only, without machine
+/// model or itinerary lookup, do so. Otherwise return -1.
+int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
+ bool FindMin) const {
+
+ // Return a latency based on the itinerary properties and defining instruction
+ // if possible. Some common subtargets don't require per-operand latency,
+ // especially for minimum latencies.
+ if (FindMin) {
+ // If MinLatency is invalid, then use the itinerary for MinLatency. If no
+ // itinerary exists either, then use single cycle latency.
+ if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) {
+ return 1;
+ }
+ return SchedModel.MinLatency;
+ }
+ else if (!hasInstrSchedModel() && !hasInstrItineraries()) {
+ return TII->defaultDefLatency(&SchedModel, DefMI);
+ }
+ // ...operand lookup required
+ return -1;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+ // Get the definition's scheduling class descriptor from this machine model.
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+
+#ifndef NDEBUG
+ unsigned NIter = 0;
+#endif
+ while (SCDesc->isVariant()) {
+ assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+ SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+ SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ }
+ return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+ unsigned DefIdx = 0;
+ for (unsigned i = 0; i != DefOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ ++DefIdx;
+ }
+ return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+ unsigned UseIdx = 0;
+ for (unsigned i = 0; i != UseOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.readsReg())
+ ++UseIdx;
+ }
+ return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+ const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *UseMI, unsigned UseOperIdx,
+ bool FindMin) const {
+
+ int DefLatency = getDefLatency(DefMI, FindMin);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ if (hasInstrItineraries()) {
+ int OperLatency = 0;
+ if (UseMI) {
+ OperLatency =
+ TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx);
+ }
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ // Rather than directly querying InstrItins stage latency, we call a TII
+ // hook to allow subtargets to specialize latency. This hook is only
+ // applicable to the InstrItins model. InstrSchedModel should model all
+ // special cases without TII hooks.
+ if (!FindMin)
+ InstrLatency = std::max(InstrLatency,
+ TII->defaultDefLatency(&SchedModel, DefMI));
+ return InstrLatency;
+ }
+ assert(!FindMin && hasInstrSchedModel() &&
+ "Expected a SchedModel for this cpu");
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+ if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ unsigned WriteID = WLEntry->WriteResourceID;
+ unsigned Latency = convertLatency(WLEntry->Cycles);
+ if (!UseMI)
+ return Latency;
+
+ // Lookup the use's latency adjustment in SubtargetInfo.
+ const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+ if (UseDesc->NumReadAdvanceEntries == 0)
+ return Latency;
+ unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+ return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+ }
+ // If DefIdx does not exist in the model (e.g. implicit defs), then return
+ // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+ && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) {
+ std::string Err;
+ raw_string_ostream ss(Err);
+ ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI;
+ report_fatal_error(ss.str());
+ }
+#endif
+ return DefMI->isTransient() ? 0 : 1;
+}
+
+unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
+ // For the itinerary model, fall back to the old subtarget hook.
+ // Allow subtargets to compute Bundle latencies outside the machine model.
+ if (hasInstrItineraries() || MI->isBundle())
+ return TII->getInstrLatency(&InstrItins, MI);
+
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid()) {
+ unsigned Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
+ }
+ return Latency;
+ }
+ }
+ return TII->defaultDefLatency(&SchedModel, MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *DepMI) const {
+ // MinLatency == -1 is for in-order processors that always have unit
+ // MinLatency. MinLatency > 0 is for in-order processors with varying min
+ // latencies, but since this is not a RAW dep, we always use unit latency.
+ if (SchedModel.MinLatency != 0)
+ return 1;
+
+ // MinLatency == 0 indicates an out-of-order processor that can dispatch
+ // WAW dependencies in the same cycle.
+
+ // Treat predication as a data dependency for out-of-order cpus. In-order
+ // cpus do not need to treat predicated writes specially.
+ //
+ // TODO: The following hack exists because predication passes do not
+ // correctly append imp-use operands, and readsReg() strangely returns false
+ // for predicated defs.
+ unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ const MachineFunction &MF = *DefMI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+ return computeInstrLatency(DefMI);
+
+ // If we have a per operand scheduling model, check if this def is writing
+ // an unbuffered resource. If so, it treated like an in-order cpu.
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index aa601af..a9058bc 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -60,116 +60,108 @@ STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
namespace {
- class TwoAddressInstructionPass : public MachineFunctionPass {
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const InstrItineraryData *InstrItins;
- MachineRegisterInfo *MRI;
- LiveVariables *LV;
- SlotIndexes *Indexes;
- LiveIntervals *LIS;
- AliasAnalysis *AA;
- CodeGenOpt::Level OptLevel;
-
- // DistanceMap - Keep track the distance of a MI from the start of the
- // current basic block.
- DenseMap<MachineInstr*, unsigned> DistanceMap;
-
- // SrcRegMap - A map from virtual registers to physical registers which
- // are likely targets to be coalesced to due to copies from physical
- // registers to virtual registers. e.g. v1024 = move r0.
- DenseMap<unsigned, unsigned> SrcRegMap;
-
- // DstRegMap - A map from virtual registers to physical registers which
- // are likely targets to be coalesced to due to copies to physical
- // registers from virtual registers. e.g. r1 = move v1024.
- DenseMap<unsigned, unsigned> DstRegMap;
-
- /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
- /// during the initial walk of the machine function.
- SmallVector<MachineInstr*, 16> RegSequences;
-
- bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
- unsigned Reg,
- MachineBasicBlock::iterator OldPos);
-
- bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
- unsigned &LastDef);
-
- bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
- MachineInstr *MI, MachineBasicBlock *MBB,
- unsigned Dist);
+class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
+
+ // The current basic block being processed.
+ MachineBasicBlock *MBB;
+
+ // DistanceMap - Keep track the distance of a MI from the start of the
+ // current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // Set of already processed instructions in the current block.
+ SmallPtrSet<MachineInstr*, 8> Processed;
- bool CommuteInstruction(MachineBasicBlock::iterator &mi,
- MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned RegC, unsigned Dist);
+ // SrcRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies from physical registers to
+ // virtual registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
- bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+ // DstRegMap - A map from virtual registers to physical registers which are
+ // likely targets to be coalesced to due to copies to physical registers from
+ // virtual registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
- bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned RegA, unsigned RegB, unsigned Dist);
+ /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+ /// during the initial walk of the machine function.
+ SmallVector<MachineInstr*, 16> RegSequences;
- bool isDefTooClose(unsigned Reg, unsigned Dist,
- MachineInstr *MI, MachineBasicBlock *MBB);
+ bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
- bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
- bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
+ bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
- bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist,
- SmallPtrSet<MachineInstr*, 8> &Processed);
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist);
- void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &Processed);
+ bool commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist);
- void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &Processed);
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
- typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
- typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
- bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
- void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB, unsigned Dist);
- void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+ bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
- /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
- /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
- /// sub-register references of the register defined by REG_SEQUENCE.
- bool EliminateRegSequences();
+ bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
- public:
- static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(ID) {
- initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
- }
+ bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ void scanUses(unsigned DstReg);
- /// runOnMachineFunction - Pass entry point.
- bool runOnMachineFunction(MachineFunction&);
- };
-}
+ void processCopy(MachineInstr *MI);
+
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
+ /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of
+ /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
+ /// references of the register defined by REG_SEQUENCE.
+ bool eliminateRegSequences();
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - Pass entry point.
+ bool runOnMachineFunction(MachineFunction&);
+};
+} // end anonymous namespace
char TwoAddressInstructionPass::ID = 0;
INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
@@ -180,13 +172,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
-/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
/// past the instruction that would kill the above mentioned register to reduce
/// register pressure.
-bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
- MachineInstr *MI, unsigned SavedReg,
- MachineBasicBlock::iterator OldPos) {
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
// FIXME: Shouldn't we be trying to do this before we three-addressify the
// instruction? After this transformation is done, we no longer need
// the instruction to be in three-address form.
@@ -299,13 +291,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
return true;
}
-/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// noUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
/// def location by reference
-bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
- MachineBasicBlock *MBB, unsigned Dist,
- unsigned &LastDef) {
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+ unsigned &LastDef) {
LastDef = 0;
unsigned LastUse = Dist;
for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
@@ -465,10 +456,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
/// isProfitableToCommute - Return true if it's potentially profitable to commute
/// the two-address instruction that's being processed.
bool
-TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
- unsigned regC,
- MachineInstr *MI, MachineBasicBlock *MBB,
- unsigned Dist) {
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -516,13 +506,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
// If there is a use of regC between its last def (could be livein) and this
// instruction, then bail.
unsigned LastDefC = 0;
- if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+ if (!noUseAfterLastDef(regC, Dist, LastDefC))
return false;
// If there is a use of regB between its last def (could be livein) and this
// instruction, then go ahead and make this transformation.
unsigned LastDefB = 0;
- if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+ if (!noUseAfterLastDef(regB, Dist, LastDefB))
return true;
// Since there are no intervening uses for both registers, then commute
@@ -530,13 +520,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
return LastDefB && LastDefC && LastDefC > LastDefB;
}
-/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// commuteInstruction - Commute a two-address instruction and update the basic
/// block, distance map, and live variables if needed. Return true if it is
/// successful.
-bool
-TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
- MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned RegC, unsigned Dist) {
+bool TwoAddressInstructionPass::
+commuteInstruction(MachineBasicBlock::iterator &mi,
+ unsigned RegB, unsigned RegC, unsigned Dist) {
MachineInstr *MI = mi;
DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
MachineInstr *NewMI = TII->commuteInstruction(MI);
@@ -555,8 +544,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
if (Indexes)
Indexes->replaceMachineInstrInMaps(MI, NewMI);
- mbbi->insert(mi, NewMI); // Insert the new inst
- mbbi->erase(mi); // Nuke the old inst.
+ MBB->insert(mi, NewMI); // Insert the new inst
+ MBB->erase(mi); // Nuke the old inst.
mi = NewMI;
DistanceMap.insert(std::make_pair(NewMI, Dist));
}
@@ -588,51 +577,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
-/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// convertInstTo3Addr - Convert the specified two-address instruction into a
/// three address one. Return true if this transformation was successful.
bool
-TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
unsigned RegA, unsigned RegB,
unsigned Dist) {
- MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
- if (NewMI) {
- DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
- DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
- bool Sunk = false;
+ // FIXME: Why does convertToThreeAddress() need an iterator reference?
+ MachineFunction::iterator MFI = MBB;
+ MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+ assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+ if (!NewMI)
+ return false;
- if (Indexes)
- Indexes->replaceMachineInstrInMaps(mi, NewMI);
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
- if (NewMI->findRegisterUseOperand(RegB, false, TRI))
- // FIXME: Temporary workaround. If the new instruction doesn't
- // uses RegB, convertToThreeAddress must have created more
- // then one instruction.
- Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+ if (Indexes)
+ Indexes->replaceMachineInstrInMaps(mi, NewMI);
- mbbi->erase(mi); // Nuke the old inst.
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = sink3AddrInstruction(NewMI, RegB, mi);
- if (!Sunk) {
- DistanceMap.insert(std::make_pair(NewMI, Dist));
- mi = NewMI;
- nmi = llvm::next(mi);
- }
+ MBB->erase(mi); // Nuke the old inst.
- // Update source and destination register maps.
- SrcRegMap.erase(RegA);
- DstRegMap.erase(RegB);
- return true;
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = llvm::next(mi);
}
- return false;
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
}
-/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// scanUses - Scan forward recursively for only uses, update maps if the use
/// is a copy or a two-address instruction.
void
-TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &Processed) {
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
SmallVector<unsigned, 4> VirtRegPairs;
bool IsDstPhys;
bool IsCopy = false;
@@ -676,7 +665,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
}
}
-/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// processCopy - If the specified instruction is not yet processed, process it
/// if it's a copy. For a copy instruction, we find the physical registers the
/// source and destination registers might be mapped to. These are kept in
/// point-to maps used to determine future optimizations. e.g.
@@ -688,9 +677,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
/// potentially joined with r1 on the output side. It's worthwhile to commute
/// 'add' to eliminate a copy.
-void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
- MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &Processed) {
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
if (Processed.count(MI))
return;
@@ -707,21 +694,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
assert(SrcRegMap[DstReg] == SrcReg &&
"Can't map to two src physical registers!");
- ScanUses(DstReg, MBB, Processed);
+ scanUses(DstReg);
}
Processed.insert(MI);
return;
}
-/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// rescheduleMIBelowKill - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
/// instruction in order to eliminate the need for the copy.
-bool
-TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
// Bail immediately if we don't have LV available. We use it to find kills
// efficiently.
if (!LV)
@@ -853,8 +839,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
/// isDefTooClose - Return true if the re-scheduling will put the given
/// instruction too close to the defs of its register dependencies.
bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
- MachineInstr *MI,
- MachineBasicBlock *MBB) {
+ MachineInstr *MI) {
for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
DE = MRI->def_end(); DI != DE; ++DI) {
MachineInstr *DefMI = &*DI;
@@ -873,15 +858,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return false;
}
-/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// rescheduleKillAboveMI - If there is one more local instruction that reads
/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
/// current two-address instruction in order to eliminate the need for the
/// copy.
-bool
-TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
// Bail immediately if we don't have LV available. We use it to find kills
// efficiently.
if (!LV)
@@ -918,7 +902,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
if (MO.isUse()) {
if (!MOReg)
continue;
- if (isDefTooClose(MOReg, DI->second, MI, MBB))
+ if (isDefTooClose(MOReg, DI->second, MI))
return false;
if (MOReg == Reg && !MO.isKill())
return false;
@@ -1006,18 +990,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
return true;
}
-/// TryInstructionTransform - For the case where an instruction has a single
+/// tryInstructionTransform - For the case where an instruction has a single
/// pair of tied register operands, attempt some transformations that may
/// either eliminate the tied operands or improve the opportunities for
/// coalescing away the register copy. Returns true if no copy needs to be
/// inserted to untie mi's operands (either because they were untied, or
/// because mi was rescheduled, and will be visited again later).
bool TwoAddressInstructionPass::
-TryInstructionTransform(MachineBasicBlock::iterator &mi,
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
- MachineFunction::iterator &mbbi,
- unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
- SmallPtrSet<MachineInstr*, 8> &Processed) {
+ unsigned SrcIdx, unsigned DstIdx, unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -1030,7 +1012,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
bool regBKilled = isKilled(MI, regB, MRI, TII);
if (TargetRegisterInfo::isVirtualRegister(regA))
- ScanUses(regA, &*mbbi, Processed);
+ scanUses(regA);
// Check if it is profitable to commute the operands.
unsigned SrcOp1, SrcOp2;
@@ -1051,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// If C dies but B does not, swap the B and C operands.
// This makes the live ranges of A and C joinable.
TryCommute = true;
- else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) {
+ else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
TryCommute = true;
AggressiveCommute = true;
}
@@ -1059,7 +1041,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
// If it's profitable to commute, try to do so.
- if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+ if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
++NumCommuted;
if (AggressiveCommute)
++NumAggrCommuted;
@@ -1068,7 +1050,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule this MI below it.
- if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+ if (rescheduleMIBelowKill(mi, nmi, regB)) {
++NumReSchedDowns;
return true;
}
@@ -1078,7 +1060,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
// Try to convert it.
- if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+ if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
++NumConvertedTo3Addr;
return true; // Done with this instruction.
}
@@ -1087,7 +1069,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule it before this MI if it's legal.
- if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+ if (rescheduleKillAboveMI(mi, nmi, regB)) {
++NumReSchedUps;
return true;
}
@@ -1131,8 +1113,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// Tentatively insert the instructions into the block so that they
// look "normal" to the transformation logic.
- mbbi->insert(mi, NewMIs[0]);
- mbbi->insert(mi, NewMIs[1]);
+ MBB->insert(mi, NewMIs[0]);
+ MBB->insert(mi, NewMIs[1]);
DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
<< "2addr: NEW INST: " << *NewMIs[1]);
@@ -1142,8 +1124,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
MachineBasicBlock::iterator NewMI = NewMIs[1];
bool TransformSuccess =
- TryInstructionTransform(NewMI, mi, mbbi,
- NewSrcIdx, NewDstIdx, Dist, Processed);
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist);
if (TransformSuccess ||
NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
// Success, or at least we made an improvement. Keep the unfolded
@@ -1202,8 +1183,7 @@ bool TwoAddressInstructionPass::
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
const MCInstrDesc &MCID = MI->getDesc();
bool AnyOps = false;
- unsigned NumOps = MI->isInlineAsm() ?
- MI->getNumOperands() : MCID.getNumOperands();
+ unsigned NumOps = MI->getNumOperands();
for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
unsigned DstIdx = 0;
@@ -1373,22 +1353,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
DEBUG(dbgs() << "********** Function: "
- << MF->getFunction()->getName() << '\n');
+ << MF->getName() << '\n');
// This pass takes the function out of SSA form.
MRI->leaveSSA();
TiedOperandMap TiedOperands;
-
- SmallPtrSet<MachineInstr*, 8> Processed;
- for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
- mbbi != mbbe; ++mbbi) {
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MBB = MBBI;
unsigned Dist = 0;
DistanceMap.clear();
SrcRegMap.clear();
DstRegMap.clear();
Processed.clear();
- for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
mi != me; ) {
MachineBasicBlock::iterator nmi = llvm::next(mi);
if (mi->isDebugValue()) {
@@ -1402,7 +1381,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
DistanceMap.insert(std::make_pair(mi, ++Dist));
- ProcessCopy(&*mi, &*mbbi, Processed);
+ processCopy(&*mi);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
@@ -1427,8 +1406,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
unsigned DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
- TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
- Processed)) {
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) {
// The tied operands have been eliminated or shifted further down the
// block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
@@ -1468,7 +1446,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
// SSA form. It's now safe to de-SSA.
- MadeChange |= EliminateRegSequences();
+ MadeChange |= eliminateRegSequences();
return MadeChange;
}
@@ -1515,127 +1493,6 @@ static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
return First;
}
-/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
-/// EXTRACT_SUBREG from the same register and to the same virtual register
-/// with different sub-register indices, attempt to combine the
-/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
-/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
-/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
-/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
-/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
-/// reg1026 to reg1029.
-void
-TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
- unsigned DstReg) {
- SmallSet<unsigned, 4> Seen;
- for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
- unsigned SrcReg = Srcs[i];
- if (!Seen.insert(SrcReg))
- continue;
-
- // Check that the instructions are all in the same basic block.
- MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg);
- MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg);
- if (!SrcDefMI || !DstDefMI ||
- SrcDefMI->getParent() != DstDefMI->getParent())
- continue;
-
- // If there are no other uses than copies which feed into
- // the reg_sequence, then we might be able to coalesce them.
- bool CanCoalesce = true;
- SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SrcReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
- CanCoalesce = false;
- break;
- }
- SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
- DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
- }
-
- if (!CanCoalesce || SrcSubIndices.size() < 2)
- continue;
-
- // Check that the source subregisters can be combined.
- std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
- unsigned NewSrcSubIdx = 0;
- if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
- NewSrcSubIdx))
- continue;
-
- // Check that the destination subregisters can also be combined.
- std::sort(DstSubIndices.begin(), DstSubIndices.end());
- unsigned NewDstSubIdx = 0;
- if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
- NewDstSubIdx))
- continue;
-
- // If neither source nor destination can be combined to the full register,
- // just give up. This could be improved if it ever matters.
- if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
- continue;
-
- // Now that we know that all the uses are extract_subregs and that those
- // subregs can somehow be combined, scan all the extract_subregs again to
- // make sure the subregs are in the right order and can be composed.
- MachineInstr *SomeMI = 0;
- CanCoalesce = true;
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SrcReg),
- UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
- MachineInstr *UseMI = &*UI;
- assert(UseMI->isCopy());
- unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
- unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
- assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
- if ((NewDstSubIdx == 0 &&
- TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
- (NewSrcSubIdx == 0 &&
- TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
- CanCoalesce = false;
- break;
- }
- // Keep track of one of the uses. Preferably the first one which has a
- // <def,undef> flag.
- if (!SomeMI || UseMI->getOperand(0).isUndef())
- SomeMI = UseMI;
- }
- if (!CanCoalesce)
- continue;
-
- // Insert a copy to replace the original.
- MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
- SomeMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define |
- getUndefRegState(SomeMI->getOperand(0).isUndef()),
- NewDstSubIdx)
- .addReg(SrcReg, 0, NewSrcSubIdx);
-
- // Remove all the old extract instructions.
- for (MachineRegisterInfo::use_nodbg_iterator
- UI = MRI->use_nodbg_begin(SrcReg),
- UE = MRI->use_nodbg_end(); UI != UE; ) {
- MachineInstr *UseMI = &*UI;
- ++UI;
- if (UseMI == CopyMI)
- continue;
- assert(UseMI->isCopy());
- // Move any kills to the new copy or extract instruction.
- if (UseMI->getOperand(1).isKill()) {
- CopyMI->getOperand(1).setIsKill();
- if (LV)
- // Update live variables
- LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
- }
- UseMI->eraseFromParent();
- }
- }
-}
-
static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
MachineRegisterInfo *MRI) {
for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
@@ -1647,7 +1504,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
return false;
}
-/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
/// sub-register references of the register defined by REG_SEQUENCE. e.g.
///
@@ -1655,7 +1512,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
/// =>
/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool TwoAddressInstructionPass::EliminateRegSequences() {
+bool TwoAddressInstructionPass::eliminateRegSequences() {
if (RegSequences.empty())
return false;
@@ -1759,10 +1616,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
MO.setIsUndef();
}
- // Make sure there is a full non-subreg imp-def operand on the
- // instruction. This shouldn't be necessary, but it seems that at least
- // RAFast requires it.
- Def->addRegisterDefined(DstReg, TRI);
DEBUG(dbgs() << "First def: " << *Def);
}
@@ -1775,12 +1628,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
}
-
- // Try coalescing some EXTRACT_SUBREG instructions. This can create
- // INSERT_SUBREG instructions that must have <undef> flags added by
- // LiveIntervalAnalysis, so only run it when LiveVariables is available.
- if (LV)
- CoalesceExtSubRegs(RealSrcs, DstReg);
}
RegSequences.clear();
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 93840f0..bb93bdc 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -19,8 +19,8 @@
#define DEBUG_TYPE "regalloc"
#include "VirtRegMap.h"
#include "LiveDebugVariables.h"
-#include "llvm/Function.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -127,9 +127,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
OS << '\n';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VirtRegMap::dump() const {
print(dbgs());
}
+#endif
//===----------------------------------------------------------------------===//
// VirtRegRewriter
@@ -170,6 +172,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
"Virtual Register Rewriter", false, false)
@@ -182,6 +185,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
AU.addRequired<VirtRegMap>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -197,11 +202,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
VRM = &getAnalysis<VirtRegMap>();
DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
<< "********** Function: "
- << MF->getFunction()->getName() << '\n');
+ << MF->getName() << '\n');
DEBUG(VRM->dump());
// Add kill flags while we still have virtual registers.
- LIS->addKillFlags();
+ LIS->addKillFlags(VRM);
// Live-in lists on basic blocks are required for physregs.
addMBBLiveIns();
@@ -252,9 +257,6 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
-#ifndef NDEBUG
- BitVector Reserved = TRI->getReservedRegs(*MF);
-#endif
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
@@ -278,7 +280,7 @@ void VirtRegRewriter::rewrite() {
unsigned PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
- assert(!Reserved.test(PhysReg) && "Reserved register assignment");
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
// Preserve semantics of sub-register operands.
if (MO.getSubReg()) {
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.h b/contrib/llvm/lib/CodeGen/VirtRegMap.h
index c320985..7974dda 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.h
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.h
@@ -63,8 +63,8 @@ namespace llvm {
/// createSpillSlot - Allocate a spill slot for RC from MFI.
unsigned createSpillSlot(const TargetRegisterClass *RC);
- VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
- void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+ VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION;
+ void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION;
public:
static char ID;
OpenPOWER on IntegriCloud