summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp159
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h12
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h14
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp348
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h7
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h1
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp2075
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h310
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp47
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp72
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp230
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h109
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h16
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp161
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h30
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp559
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h148
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h8
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp63
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h46
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp186
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h24
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h1
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp411
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h138
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp60
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp1168
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp263
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h32
-rw-r--r--contrib/llvm/lib/CodeGen/BuiltinGCs.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp265
-rw-r--r--contrib/llvm/lib/CodeGen/CoreCLRGC.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h10
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp183
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp602
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/ErlangGC.cpp46
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp164
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp104
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp897
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp663
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp254
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp264
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp1140
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp132
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp420
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp340
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp552
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h5
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeUtils.h62
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp64
-rw-r--r--contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp162
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h5
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp263
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h60
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp248
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp273
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp119
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp901
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp369
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp124
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp261
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp736
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp160
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp111
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp341
-rw-r--r--contrib/llvm/lib/CodeGen/OcamlGC.cpp36
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp39
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp105
-rw-r--r--contrib/llvm/lib/CodeGen/PatchableFunction.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp63
-rw-r--r--contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp524
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h10
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp81
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp131
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp259
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp695
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp148
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp93
-rw-r--r--contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp388
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp851
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp291
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.h149
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.h68
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp993
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1703
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp80
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp773
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp179
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp101
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h179
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp461
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h11
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp288
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1587
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1178
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h180
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp563
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp (renamed from contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp)10
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp634
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h34
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp705
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGC.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp145
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp91
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h6
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h4
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp244
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h84
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp619
-rw-r--r--contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp204
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp47
-rw-r--r--contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp963
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplicator.cpp932
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp383
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp237
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp144
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp (renamed from contrib/llvm/lib/CodeGen/Passes.cpp)119
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp212
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp96
-rw-r--r--contrib/llvm/lib/CodeGen/module.modulemap1
201 files changed, 25562 insertions, 12616 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 4060db7..a736884 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -180,7 +180,7 @@ void AggressiveAntiDepBreaker::FinishBlock() {
State = nullptr;
}
-void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
unsigned InsertPosIndex) {
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
@@ -190,7 +190,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
ScanInstruction(MI, Count);
DEBUG(dbgs() << "Observe: ");
- DEBUG(MI->dump());
+ DEBUG(MI.dump());
DEBUG(dbgs() << "\tRegs:");
std::vector<unsigned> &DefIndices = State->GetDefIndices();
@@ -214,9 +214,8 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
DEBUG(dbgs() << '\n');
}
-bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
- MachineOperand& MO)
-{
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
+ MachineOperand &MO) {
if (!MO.isReg() || !MO.isImplicit())
return false;
@@ -226,19 +225,19 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
MachineOperand *Op = nullptr;
if (MO.isDef())
- Op = MI->findRegisterUseOperand(Reg, true);
+ Op = MI.findRegisterUseOperand(Reg, true);
else
- Op = MI->findRegisterDefOperand(Reg);
+ Op = MI.findRegisterDefOperand(Reg);
return(Op && Op->isImplicit());
}
-void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
- std::set<unsigned>& PassthruRegs) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+void AggressiveAntiDepBreaker::GetPassthruRegs(
+ MachineInstr &MI, std::set<unsigned> &PassthruRegs) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+ if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
IsImplicitDefUse(MI, MO)) {
const unsigned Reg = MO.getReg();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -313,28 +312,30 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DEBUG(if (header) {
dbgs() << header << TRI->getName(Reg); header = nullptr; });
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
- }
- // Repeat for subregisters.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubregReg = *SubRegs;
- if (!State->IsLive(SubregReg)) {
- KillIndices[SubregReg] = KillIdx;
- DefIndices[SubregReg] = ~0u;
- RegRefs.erase(SubregReg);
- State->LeaveGroup(SubregReg);
- DEBUG(if (header) {
- dbgs() << header << TRI->getName(Reg); header = nullptr; });
- DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
- State->GetGroup(SubregReg) << tag);
+ // Repeat for subregisters. Note that we only do this if the superregister
+ // was not live because otherwise, regardless whether we have an explicit
+ // use of the subregister, the subregister's contents are needed for the
+ // uses of the superregister.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header) {
+ dbgs() << header << TRI->getName(Reg); header = nullptr; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
}
}
DEBUG(if (!header && footer) dbgs() << footer);
}
-void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
- unsigned Count,
- std::set<unsigned>& PassthruRegs) {
+void AggressiveAntiDepBreaker::PrescanInstruction(
+ MachineInstr &MI, unsigned Count, std::set<unsigned> &PassthruRegs) {
std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -344,8 +345,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -354,8 +355,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
}
DEBUG(dbgs() << "\tDef Groups:");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -367,8 +368,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// defined in a call must not be changed (ABI). Inline assembly may
// reference either system calls or the register directly. Skip it until we
// can tell user specified registers from compiler-specified.
- if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
- TII->isPredicated(MI) || MI->isInlineAsm()) {
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) ||
+ MI.isInlineAsm()) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -386,8 +387,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = nullptr;
- if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -396,13 +397,13 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// Scan the register defs for this instruction and update
// live-ranges.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
// Ignore KILLs and passthru registers for liveness...
- if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+ if (MI.isKill() || (PassthruRegs.count(Reg) != 0))
continue;
// Update def for Reg and aliases.
@@ -421,7 +422,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
}
}
-void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
unsigned Count) {
DEBUG(dbgs() << "\tUse Groups:");
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
@@ -444,14 +445,13 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->isCall() ||
- MI->hasExtraSrcRegAllocReq() ||
- TII->isPredicated(MI) || MI->isInlineAsm();
+ bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI) || MI.isInlineAsm();
// Scan the register uses for this instruction and update
// live-ranges, groups and RegRefs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -471,8 +471,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Note register reference...
const TargetRegisterClass *RC = nullptr;
- if (i < MI->getDesc().getNumOperands())
- RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
RegRefs.insert(std::make_pair(Reg, RR));
}
@@ -481,12 +481,12 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Form a group of all defs and uses of a KILL instruction to ensure
// that all registers are renamed as a group.
- if (MI->isKill()) {
+ if (MI.isKill()) {
DEBUG(dbgs() << "\tKill Group:");
unsigned FirstReg = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -563,13 +563,16 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
if (RegRefs.count(Reg) > 0) {
DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
- BitVector BV = GetRenameRegisters(Reg);
- RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+ BitVector &BV = RenameRegisterMap[Reg];
+ assert(BV.empty());
+ BV = GetRenameRegisters(Reg);
- DEBUG(dbgs() << " ::");
- DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
- dbgs() << " " << TRI->getName(r));
- DEBUG(dbgs() << "\n");
+ DEBUG({
+ dbgs() << " ::";
+ for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r);
+ dbgs() << "\n";
+ });
}
}
@@ -650,8 +653,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG(dbgs() << " " << TRI->getName(NewReg));
// Check if Reg can be renamed to NewReg.
- BitVector BV = RenameRegisterMap[Reg];
- if (!BV.test(NewReg)) {
+ if (!RenameRegisterMap[Reg].test(NewReg)) {
DEBUG(dbgs() << "(no rename)");
goto next_super_reg;
}
@@ -785,6 +787,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
DEBUG(dbgs() << '\n');
#endif
+ BitVector RegAliases(TRI->getNumRegs());
+
// Attempt to break anti-dependence edges. Walk the instructions
// from the bottom up, tracking information about liveness as we go
// to help determine which registers are available.
@@ -792,13 +796,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
unsigned Count = InsertPosIndex - 1;
for (MachineBasicBlock::iterator I = End, E = Begin;
I != E; --Count) {
- MachineInstr *MI = --I;
+ MachineInstr &MI = *--I;
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
continue;
DEBUG(dbgs() << "Anti: ");
- DEBUG(MI->dump());
+ DEBUG(MI.dump());
std::set<unsigned> PassthruRegs;
GetPassthruRegs(MI, PassthruRegs);
@@ -809,13 +813,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// The dependence edges that represent anti- and output-
// dependencies that are candidates for breaking.
std::vector<const SDep *> Edges;
- const SUnit *PathSU = MISUnitMap[MI];
+ const SUnit *PathSU = MISUnitMap[&MI];
AntiDepEdges(PathSU, Edges);
// If MI is not on the critical path, then we don't rename
// registers in the CriticalPathSet.
BitVector *ExcludeRegs = nullptr;
- if (MI == CriticalPathMI) {
+ if (&MI == CriticalPathMI) {
CriticalPathSU = CriticalPathStep(CriticalPathSU);
CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr;
} else if (CriticalPathSet.any()) {
@@ -824,7 +828,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Ignore KILL instructions (they form a group in ScanInstruction
// but don't cause any anti-dependence breaking themselves)
- if (!MI->isKill()) {
+ if (!MI.isKill()) {
// Attempt to break each anti-dependency...
for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
const SDep *Edge = Edges[i];
@@ -854,7 +858,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
continue;
} else {
// No anti-dep breaking for implicit deps
- MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+ MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);
assert(AntiDepOp && "Can't find index for defined register operand");
if (!AntiDepOp || AntiDepOp->isImplicit()) {
DEBUG(dbgs() << " (implicit)\n");
@@ -896,6 +900,29 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
if (AntiDepReg == 0) continue;
+
+ // If the definition of the anti-dependency register does not start
+ // a new live range, bail out. This can happen if the anti-dep
+ // register is a sub-register of another register whose live range
+ // spans over PathSU. In such case, PathSU defines only a part of
+ // the larger register.
+ RegAliases.reset();
+ for (MCRegAliasIterator AI(AntiDepReg, TRI, true); AI.isValid(); ++AI)
+ RegAliases.set(*AI);
+ for (SDep S : PathSU->Succs) {
+ SDep::Kind K = S.getKind();
+ if (K != SDep::Data && K != SDep::Output && K != SDep::Anti)
+ continue;
+ unsigned R = S.getReg();
+ if (!RegAliases[R])
+ continue;
+ if (R == AntiDepReg || TRI->isSubRegister(AntiDepReg, R))
+ continue;
+ AntiDepReg = 0;
+ break;
+ }
+
+ if (AntiDepReg == 0) continue;
}
assert(AntiDepReg != 0);
@@ -938,7 +965,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
for (DbgValueVector::iterator DVI = DbgValues.begin(),
DVE = DbgValues.end(); DVI != DVE; ++DVI)
if (DVI->second == Q.second.Operand->getParent())
- UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index eba7383..f97e666 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -144,7 +144,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
- void Observe(MachineInstr *MI, unsigned Count,
+ void Observe(MachineInstr &MI, unsigned Count,
unsigned InsertPosIndex) override;
/// Finish anti-dep breaking for a basic block.
@@ -156,19 +156,19 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
/// Return true if MO represents a register
/// that is both implicitly used and defined in MI
- bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+ bool IsImplicitDefUse(MachineInstr &MI, MachineOperand &MO);
/// If MI implicitly def/uses a register, then
/// return that register and all subregisters.
- void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+ void GetPassthruRegs(MachineInstr &MI, std::set<unsigned> &PassthruRegs);
void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
const char *header = nullptr,
const char *footer = nullptr);
- void PrescanInstruction(MachineInstr *MI, unsigned Count,
- std::set<unsigned>& PassthruRegs);
- void ScanInstruction(MachineInstr *MI, unsigned Count);
+ void PrescanInstruction(MachineInstr &MI, unsigned Count,
+ std::set<unsigned> &PassthruRegs);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
BitVector GetRenameRegisters(unsigned Reg);
bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
RenameOrderType& RenameOrder,
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 75579a2..d690734 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -624,7 +623,9 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
if (!GV->hasLinkOnceODRLinkage())
return false;
- if (GV->hasUnnamedAddr())
+ // We assume that anyone who sets global unnamed_addr on a non-constant knows
+ // what they're doing.
+ if (GV->hasGlobalUnnamedAddr())
return true;
// If it is a non constant variable, it needs to be uniqued across shared
@@ -634,47 +635,36 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
return false;
}
- // An alias can point to a variable. We could try to resolve the alias to
- // decide, but for now just don't hide them.
- if (isa<GlobalAlias>(GV))
- return false;
-
- GlobalStatus GS;
- if (GlobalStatus::analyzeGlobal(GV, GS))
- return false;
-
- return !GS.IsCompared;
+ return GV->hasAtLeastLocalUnnamedAddr();
}
static void collectFuncletMembers(
DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
const MachineBasicBlock *MBB) {
- // Add this MBB to our funclet.
- auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet));
+ SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
+ while (!Worklist.empty()) {
+ const MachineBasicBlock *Visiting = Worklist.pop_back_val();
+ // Don't follow blocks which start new funclets.
+ if (Visiting->isEHPad() && Visiting != MBB)
+ continue;
- // Don't revisit blocks.
- if (!P.second) {
- assert(P.first->second == Funclet && "MBB is part of two funclets!");
- return;
- }
+ // Add this MBB to our funclet.
+ auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet));
- bool IsReturn = false;
- int NumTerminators = 0;
- for (const MachineInstr &MI : MBB->terminators()) {
- IsReturn |= MI.isReturn();
- ++NumTerminators;
- }
- assert((!IsReturn || NumTerminators == 1) &&
- "Expected only one terminator when a return is present!");
+ // Don't revisit blocks.
+ if (!P.second) {
+ assert(P.first->second == Funclet && "MBB is part of two funclets!");
+ continue;
+ }
- // Returns are boundaries where funclet transfer can occur, don't follow
- // successors.
- if (IsReturn)
- return;
+ // Returns are boundaries where funclet transfer can occur, don't follow
+ // successors.
+ if (Visiting->isReturnBlock())
+ continue;
- for (const MachineBasicBlock *SMBB : MBB->successors())
- if (!SMBB->isEHPad())
- collectFuncletMembers(FuncletMembership, Funclet, SMBB);
+ for (const MachineBasicBlock *Succ : Visiting->successors())
+ Worklist.push_back(Succ);
+ }
}
DenseMap<const MachineBasicBlock *, int>
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
index 9f05200..04f7f41 100644
--- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -47,18 +47,18 @@ public:
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- virtual void Observe(MachineInstr *MI, unsigned Count,
- unsigned InsertPosIndex) =0;
-
+ virtual void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) = 0;
+
/// Finish anti-dep breaking for a basic block.
virtual void FinishBlock() =0;
/// Update DBG_VALUE if dependency breaker is updating
/// other machine instruction to use NewReg.
- void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) {
- assert (MI->isDebugValue() && "MI is not DBG_VALUE!");
- if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg)
- MI->getOperand(0).setReg(NewReg);
+ void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) {
+ assert(MI.isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg)
+ MI.getOperand(0).setReg(NewReg);
}
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index ade2d71..5294c98 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -28,7 +27,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index 211fc98..ba3e3b7 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -11,10 +11,10 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSymbol.h"
namespace llvm {
class MCSection;
-class MCSymbol;
class AsmPrinter;
// Collection of addresses for this unit and assorted labels.
// A Symbol->unsigned mapping of addresses used by indirect
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5f67d3d..272bace 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -12,11 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
+#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
#include "WinException.h"
-#include "WinCodeViewLineTables.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/Analysis.h"
@@ -125,6 +124,10 @@ AsmPrinter::~AsmPrinter() {
}
}
+bool AsmPrinter::isPositionIndependent() const {
+ return TM.isPositionIndependent();
+}
+
/// getFunctionNumber - Return a unique ID for the current function.
///
unsigned AsmPrinter::getFunctionNumber() const {
@@ -248,12 +251,13 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
- Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
+ Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
DbgTimerName,
CodeViewLineTablesGroupName));
}
if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
+ DD->beginModule();
Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
}
}
@@ -319,21 +323,17 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
}
return;
- case GlobalValue::AppendingLinkage:
- // FIXME: appending linkage variables should go into a section of
- // their name or something. For now, just emit them as external.
case GlobalValue::ExternalLinkage:
- // If external or appending, declare as a global symbol.
- // .globl _foo
+ // If external, declare as a global symbol: .globl _foo
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
return;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
return;
+ case GlobalValue::AppendingLinkage:
case GlobalValue::AvailableExternallyLinkage:
- llvm_unreachable("Should never emit this");
case GlobalValue::ExternalWeakLinkage:
- llvm_unreachable("Don't know how to emit these");
+ llvm_unreachable("Should never emit this");
}
llvm_unreachable("Unknown linkage type!");
}
@@ -347,51 +347,17 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
return TM.getSymbol(GV, *Mang);
}
-static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) {
- return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName());
-}
-
-static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) {
- return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName());
-}
-
-/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable.
-void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
- MCSymbol *EmittedSym,
- bool AllZeroInitValue) {
- MCSection *TLSVarSection = getObjFileLowering().getDataSection();
- OutStreamer->SwitchSection(TLSVarSection);
- MCSymbol *GVSym = getSymbol(GV);
- EmitLinkage(GV, EmittedSym); // same linkage as GV
- const DataLayout &DL = GV->getParent()->getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
- unsigned AlignLog = getGVAlignmentLog2(GV, DL);
- unsigned WordSize = DL.getPointerSize();
- unsigned Alignment = DL.getPointerABIAlignment();
- EmitAlignment(Log2_32(Alignment));
- OutStreamer->EmitLabel(EmittedSym);
- OutStreamer->EmitIntValue(Size, WordSize);
- OutStreamer->EmitIntValue((1 << AlignLog), WordSize);
- OutStreamer->EmitIntValue(0, WordSize);
- if (GV->hasInitializer() && !AllZeroInitValue) {
- OutStreamer->EmitSymbolValue(
- getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize);
- } else
- OutStreamer->EmitIntValue(0, WordSize);
- if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym),
- MCConstantExpr::create(4 * WordSize, OutContext));
- OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable.
-}
-
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
- bool IsEmuTLSVar =
- GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal &&
- TM.Options.EmulatedTLS;
+ bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal();
assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
"No emulated TLS variables in the common section");
+ // Never emit TLS variable xyz in emulated TLS model.
+ // The initialization value is in __emutls_t.xyz instead of xyz.
+ if (IsEmuTLSVar)
+ return;
+
if (GV->hasInitializer()) {
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(GV))
@@ -402,7 +368,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GlobalGOTEquivs.count(getSymbol(GV)))
return;
- if (isVerbose() && !IsEmuTLSVar) {
+ if (isVerbose()) {
// When printing the control variable __emutls_v.*,
// we don't need to print the original TLS variable name.
GV->printAsOperand(OutStreamer->GetCommentOS(),
@@ -412,11 +378,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
MCSymbol *GVSym = getSymbol(GV);
- MCSymbol *EmittedSym = IsEmuTLSVar ?
- getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym;
- // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes.
- // GV's or GVSym's attributes will be used for the EmittedSym.
+ MCSymbol *EmittedSym = GVSym;
+ // getOrCreateEmuTLSControlSym only creates the symbol with name and default
+ // attributes.
+ // GV's or GVSym's attributes will be used for the EmittedSym.
EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
if (!GV->hasInitializer()) // External globals require no extra code.
@@ -440,48 +406,47 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// sections and expected to be contiguous (e.g. ObjC metadata).
unsigned AlignLog = getGVAlignmentLog2(GV, DL);
- bool AllZeroInitValue = false;
- const Constant *InitValue = GV->getInitializer();
- if (isa<ConstantAggregateZero>(InitValue))
- AllZeroInitValue = true;
- else {
- const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
- if (InitIntValue && InitIntValue->isZero())
- AllZeroInitValue = true;
- }
- if (IsEmuTLSVar)
- EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue);
-
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
HI.Handler->setSymbolSize(GVSym, Size);
}
- // Handle common and BSS local symbols (.lcomm).
- if (GVKind.isCommon() || GVKind.isBSSLocal()) {
- assert(!(IsEmuTLSVar && GVKind.isCommon()) &&
- "No emulated TLS variables in the common section");
+ // Handle common symbols
+ if (GVKind.isCommon()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
- // Handle common symbols.
- if (GVKind.isCommon()) {
- if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
- Align = 0;
+ // .comm _foo, 42, 4
+ OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
- // .comm _foo, 42, 4
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
- return;
- }
+ // Determine to which section this global should be emitted.
+ MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
- // Handle local BSS symbols.
- if (MAI->hasMachoZeroFillDirective()) {
- MCSection *TheSection =
- getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
- // .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
- return;
- }
+ // If we have a bss global going to a section that supports the
+ // zerofill directive, do so here.
+ if (GVKind.isBSS() && MAI->hasMachoZeroFillDirective() &&
+ TheSection->isVirtualSection()) {
+ if (Size == 0)
+ Size = 1; // zerofill of 0 bytes is undefined.
+ unsigned Align = 1 << AlignLog;
+ EmitLinkage(GV, GVSym);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
+ return;
+ }
+
+ // If this is a BSS local symbol and we are emitting in the BSS
+ // section use .lcomm/.comm directive.
+ if (GVKind.isBSSLocal() &&
+ getObjFileLowering().getBSSSection() == TheSection) {
+ if (Size == 0)
+ Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
// Use .lcomm only if it supports user-specified alignment.
// Otherwise, while it would still be correct to use .lcomm in some
@@ -505,30 +470,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
return;
}
- if (IsEmuTLSVar && AllZeroInitValue)
- return; // No need of initialization values.
-
- MCSymbol *EmittedInitSym = IsEmuTLSVar ?
- getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym;
- // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes.
- // GV's or GVSym's attributes will be used for the EmittedInitSym.
-
- MCSection *TheSection = IsEmuTLSVar ?
- getObjFileLowering().getReadOnlySection() :
- getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
-
- // Handle the zerofill directive on darwin, which is a special form of BSS
- // emission.
- if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) {
- if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined.
-
- // .globl _foo
- OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
- // .zerofill __DATA, __common, _foo, 400, 5
- OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
- return;
- }
-
// Handle thread local data for mach-o which requires us to output an
// additional structure of data and mangle the original symbol so that we
// can reference it later.
@@ -539,7 +480,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// TLOF class. This will also make it more obvious that stuff like
// MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
// specific code.
- if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) {
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
// Emit the .tbss symbol
MCSymbol *MangSym =
OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
@@ -581,11 +522,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
return;
}
+ MCSymbol *EmittedInitSym = GVSym;
+
OutStreamer->SwitchSection(TheSection);
- // emutls_t.* symbols are only used in the current compilation unit.
- if (!IsEmuTLSVar)
- EmitLinkage(GV, EmittedInitSym);
+ EmitLinkage(GV, EmittedInitSym);
EmitAlignment(AlignLog, GV);
OutStreamer->EmitLabel(EmittedInitSym);
@@ -696,20 +637,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
- if (TII->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
- } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) {
+ } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
- } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) {
+ } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
- } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) {
+ } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
@@ -745,7 +686,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
AP.MF->getSubtarget().getRegisterInfo())
<< (Op.isDef() ? "<def>" : "<kill>");
}
- AP.OutStreamer->AddComment(Str);
+ AP.OutStreamer->AddComment(OS.str());
AP.OutStreamer->AddBlankLine();
}
@@ -1065,8 +1006,9 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// Global GOT equivalents are unnamed private globals with a constant
// pointer initializer to another global symbol. They must point to a
// GlobalVariable or Function, i.e., as GlobalValue.
- if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() ||
- !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0)))
+ if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
+ !GV->isConstant() || !GV->isDiscardableIfUnused() ||
+ !dyn_cast<GlobalValue>(GV->getOperand(0)))
return false;
// To be a got equivalent, at least one of its users need to be a constant
@@ -1118,6 +1060,52 @@ void AsmPrinter::emitGlobalGOTEquivs() {
EmitGlobalVariable(GV);
}
+void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
+ const GlobalIndirectSymbol& GIS) {
+ MCSymbol *Name = getSymbol(&GIS);
+
+ if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
+ else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+
+ // Set the symbol type to function if the alias has a function type.
+ // This affects codegen when the aliasee is not a function.
+ if (GIS.getType()->getPointerElementType()->isFunctionTy()) {
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+ if (isa<GlobalIFunc>(GIS))
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ }
+
+ EmitVisibility(Name, GIS.getVisibility());
+
+ const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
+
+ if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry);
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer->EmitAssignment(Name, Expr);
+
+ if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA->getBaseObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
+ MCConstantExpr::create(Size, OutContext));
+ }
+ }
+}
+
bool AsmPrinter::doFinalization(Module &M) {
// Set the MachineFunction to nullptr so that we can catch attempted
// accesses to MF specific features at the module level and so that
@@ -1191,55 +1179,35 @@ bool AsmPrinter::doFinalization(Module &M) {
// to notice uses in operands (due to constant exprs etc). This should
// happen with the MC stuff eventually.
- // Print out module-level global variables here.
- for (const auto &G : M.globals()) {
- if (!G.hasExternalWeakLinkage())
+ // Print out module-level global objects here.
+ for (const auto &GO : M.global_objects()) {
+ if (!GO.hasExternalWeakLinkage())
continue;
- OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
- }
-
- for (const auto &F : M) {
- if (!F.hasExternalWeakLinkage())
- continue;
- OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
+ OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
}
}
OutStreamer->AddBlankLine();
- for (const auto &Alias : M.aliases()) {
- MCSymbol *Name = getSymbol(&Alias);
-
- if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
- else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
- OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
-
- // Set the symbol type to function if the alias has a function type.
- // This affects codegen when the aliasee is not a function.
- if (Alias.getType()->getPointerElementType()->isFunctionTy())
- OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
-
- EmitVisibility(Name, Alias.getVisibility());
- // Emit the directives as assignments aka .set:
- OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
-
- // If the aliasee does not correspond to a symbol in the output, i.e. the
- // alias is not of an object or the aliased object is private, then set the
- // size of the alias symbol from the type of the alias. We don't do this in
- // other situations as the alias and aliasee having differing types but same
- // size may be intentional.
- const GlobalObject *BaseObject = Alias.getBaseObject();
- if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() &&
- (!BaseObject || BaseObject->hasPrivateLinkage())) {
- const DataLayout &DL = M.getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(Alias.getValueType());
- OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
- MCConstantExpr::create(Size, OutContext));
+ // Print aliases in topological order, that is, for each alias a = b,
+ // b must be printed before a.
+ // This is because on some targets (e.g. PowerPC) linker expects aliases in
+ // such an order to generate correct TOC information.
+ SmallVector<const GlobalAlias *, 16> AliasStack;
+ SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
+ for (const auto &Alias : M.aliases()) {
+ for (const GlobalAlias *Cur = &Alias; Cur;
+ Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
+ if (!AliasVisited.insert(Cur).second)
+ break;
+ AliasStack.push_back(Cur);
}
+ for (const GlobalAlias *AncestorAlias : reverse(AliasStack))
+ emitGlobalIndirectSymbol(M, *AncestorAlias);
+ AliasStack.clear();
}
+ for (const auto &IFunc : M.ifuncs())
+ emitGlobalIndirectSymbol(M, IFunc);
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -1252,9 +1220,10 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit __morestack address if needed for indirect calls.
if (MMI->usesMorestackAddr()) {
+ unsigned Align = 1;
MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
getDataLayout(), SectionKind::getReadOnly(),
- /*C=*/nullptr);
+ /*C=*/nullptr, Align);
OutStreamer->SwitchSection(ReadOnlySection);
MCSymbol *AddrSymbol =
@@ -1344,8 +1313,8 @@ void AsmPrinter::EmitConstantPool() {
if (!CPE.isMachineConstantPoolEntry())
C = CPE.Val.ConstVal;
- MCSection *S =
- getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C);
+ MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(),
+ Kind, C, Align);
// The number of sections are small, just do a linear search from the
// last section to the first.
@@ -1443,7 +1412,7 @@ void AsmPrinter::EmitJumpTableInfo() {
// For the EK_LabelDifference32 entry, if using .set avoids a relocation,
/// emit a .set directive for each unique entry.
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
- MAI->doesSetDirectiveSuppressesReloc()) {
+ MAI->doesSetDirectiveSuppressReloc()) {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
@@ -1524,7 +1493,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
// If the .set directive avoids relocations, this is emitted as:
// .set L4_5_set_123, LBB123 - LJTI1_2
// .word L4_5_set_123
- if (MAI->doesSetDirectiveSuppressesReloc()) {
+ if (MAI->doesSetDirectiveSuppressReloc()) {
Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
OutContext);
break;
@@ -1555,7 +1524,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
// Ignore debug and non-emitted data. This handles llvm.compiler.used.
- if (StringRef(GV->getSection()) == "llvm.metadata" ||
+ if (GV->getSection() == "llvm.metadata" ||
GV->hasAvailableExternallyLinkage())
return true;
@@ -1589,7 +1558,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
return true;
}
- return false;
+ report_fatal_error("unknown special variable");
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
@@ -1648,7 +1617,8 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
S.Priority = Priority->getLimitedValue(65535);
S.Func = CS->getOperand(1);
if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
- S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
+ S.ComdatKey =
+ dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
}
// Emit the function pointers in the target-specific order
@@ -1789,10 +1759,6 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
llvm_unreachable("Unknown constant value to lower!");
}
- if (const MCExpr *RelocExpr
- = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM))
- return RelocExpr;
-
switch (CE->getOpcode()) {
default:
// If the code isn't optimized, there may be outstanding folding
@@ -1868,10 +1834,34 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
}
+ case Instruction::Sub: {
+ GlobalValue *LHSGV;
+ APInt LHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset,
+ getDataLayout())) {
+ GlobalValue *RHSGV;
+ APInt RHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
+ getDataLayout())) {
+ const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(
+ LHSGV, RHSGV, *Mang, TM);
+ if (!RelocExpr)
+ RelocExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
+ MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ int64_t Addend = (LHSOffset - RHSOffset).getSExtValue();
+ if (Addend != 0)
+ RelocExpr = MCBinaryExpr::createAdd(
+ RelocExpr, MCConstantExpr::create(Addend, Ctx), Ctx);
+ return RelocExpr;
+ }
+ }
+ }
+ // else fallthrough
+
// The MC library also has a right-shift operator, but it isn't consistently
// signed or unsigned between different targets.
case Instruction::Add:
- case Instruction::Sub:
case Instruction::Mul:
case Instruction::SDiv:
case Instruction::SRem:
@@ -1964,7 +1954,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
// Don't emit a 1-byte object as a .fill.
if (Bytes > 1)
- return AP.OutStreamer->EmitFill(Bytes, Value);
+ return AP.OutStreamer->emitFill(Bytes, Value);
}
// If this can be emitted with .ascii/.asciz, emit it as such.
@@ -2003,7 +1993,7 @@ static void emitGlobalConstantArray(const DataLayout &DL,
if (Value != -1) {
uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
- AP.OutStreamer->EmitFill(Bytes, Value);
+ AP.OutStreamer->emitFill(Bytes, Value);
}
else {
for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
@@ -2582,7 +2572,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// If we are the operands of one of the branches, this is not a fall
// through. Note that targets with delay slots will usually bundle
// terminators with the delay slot instruction.
- for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) {
+ for (ConstMIBundleOperands OP(MI); OP.isValid(); ++OP) {
if (OP->isJTI())
return false;
if (OP->isMBB() && OP->getMBB() == MBB)
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 504c5d2..60f40d0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -178,8 +178,7 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
/// EmitDwarfRegOp - Emit dwarf register operation.
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
const MachineLocation &MLoc) const {
- DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(),
- getDwarfDebug()->getDwarfVersion(), Streamer);
+ DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer);
const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
@@ -193,7 +192,8 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
"nop (could not find a dwarf register number)");
// Attempt to find a valid super- or sub-register.
- if (!Expr.AddMachineRegPiece(MLoc.getReg()))
+ if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(),
+ MLoc.getReg()))
Expr.EmitOp(dwarf::DW_OP_nop,
"nop (could not find a dwarf register number)");
return;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index e59961f..638226e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -19,11 +19,14 @@
namespace llvm {
+class AsmPrinter;
class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
class MCSymbol;
+typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
+
/// \brief Collects and handles AsmPrinter objects required to build debug
/// or EH information.
class AsmPrinterHandler {
@@ -51,6 +54,10 @@ public:
/// beginFunction at all.
virtual void endFunction(const MachineFunction *MF) = 0;
+ virtual void beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) {}
+ virtual void endFragment() {}
+
/// \brief Emit target-specific EH funclet machinery.
virtual void beginFunclet(const MachineBasicBlock &MBB,
MCSymbol *Sym = nullptr) {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 5633aa4..2ce6c18 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -23,10 +23,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index df1997b..aaf6180 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -16,7 +16,6 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
#include "DIEHash.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/LEB128.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
new file mode 100644
index 0000000..ebf80de
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -0,0 +1,2075 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeViewDebug.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeDumper.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
+ // If module doesn't have named metadata anchors or COFF debug section
+ // is not available, skip any debug info related stuff.
+ if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
+ !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ Asm = nullptr;
+ return;
+ }
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+}
+
+StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
+ std::string &Filepath = FileToFilepathMap[File];
+ if (!Filepath.empty())
+ return Filepath;
+
+ StringRef Dir = File->getDirectory(), Filename = File->getFilename();
+
+ // Clang emits directory and relative filename info into the IR, but CodeView
+ // operates on full paths. We could change Clang to emit full paths too, but
+ // that would increase the IR size and probably not needed for other users.
+ // For now, just concatenate and canonicalize the path here.
+ if (Filename.find(':') == 1)
+ Filepath = Filename;
+ else
+ Filepath = (Dir + "\\" + Filename).str();
+
+ // Canonicalize the path. We have to do it textually because we may no longer
+ // have access the file in the filesystem.
+ // First, replace all slashes with backslashes.
+ std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
+
+ // Remove all "\.\" with "\".
+ size_t Cursor = 0;
+ while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 2);
+
+ // Replace all "\XXX\..\" with "\". Don't try too hard though as the original
+ // path should be well-formatted, e.g. start with a drive letter, etc.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
+ // Something's wrong if the path starts with "\..\", abort.
+ if (Cursor == 0)
+ break;
+
+ size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
+ if (PrevSlash == std::string::npos)
+ // Something's wrong, abort.
+ break;
+
+ Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
+ // The next ".." might be following the one we've just erased.
+ Cursor = PrevSlash;
+ }
+
+ // Remove all duplicate backslashes.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 1);
+
+ return Filepath;
+}
+
+unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
+ unsigned NextId = FileIdMap.size() + 1;
+ auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
+ if (Insertion.second) {
+ // We have to compute the full filepath and emit a .cv_file directive.
+ StringRef FullPath = getFullFilepath(F);
+ NextId = OS.EmitCVFileDirective(NextId, FullPath);
+ assert(NextId == FileIdMap.size() && ".cv_file directive failed");
+ }
+ return Insertion.first->second;
+}
+
+CodeViewDebug::InlineSite &
+CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee) {
+ auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
+ InlineSite *Site = &SiteInsertion.first->second;
+ if (SiteInsertion.second) {
+ Site->SiteFuncId = NextFuncId++;
+ Site->Inlinee = Inlinee;
+ InlinedSubprograms.insert(Inlinee);
+ getFuncIdForSubprogram(Inlinee);
+ }
+ return *Site;
+}
+
+static StringRef getPrettyScopeName(const DIScope *Scope) {
+ StringRef ScopeName = Scope->getName();
+ if (!ScopeName.empty())
+ return ScopeName;
+
+ switch (Scope->getTag()) {
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ return "<unnamed-tag>";
+ case dwarf::DW_TAG_namespace:
+ return "`anonymous namespace'";
+ }
+
+ return StringRef();
+}
+
+static const DISubprogram *getQualifiedNameComponents(
+ const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) {
+ const DISubprogram *ClosestSubprogram = nullptr;
+ while (Scope != nullptr) {
+ if (ClosestSubprogram == nullptr)
+ ClosestSubprogram = dyn_cast<DISubprogram>(Scope);
+ StringRef ScopeName = getPrettyScopeName(Scope);
+ if (!ScopeName.empty())
+ QualifiedNameComponents.push_back(ScopeName);
+ Scope = Scope->getScope().resolve();
+ }
+ return ClosestSubprogram;
+}
+
+static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
+ StringRef TypeName) {
+ std::string FullyQualifiedName;
+ for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) {
+ FullyQualifiedName.append(QualifiedNameComponent);
+ FullyQualifiedName.append("::");
+ }
+ FullyQualifiedName.append(TypeName);
+ return FullyQualifiedName;
+}
+
+static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) {
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ getQualifiedNameComponents(Scope, QualifiedNameComponents);
+ return getQualifiedName(QualifiedNameComponents, Name);
+}
+
+struct CodeViewDebug::TypeLoweringScope {
+ TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; }
+ ~TypeLoweringScope() {
+ // Don't decrement TypeEmissionLevel until after emitting deferred types, so
+ // inner TypeLoweringScopes don't attempt to emit deferred types.
+ if (CVD.TypeEmissionLevel == 1)
+ CVD.emitDeferredCompleteTypes();
+ --CVD.TypeEmissionLevel;
+ }
+ CodeViewDebug &CVD;
+};
+
+static std::string getFullyQualifiedName(const DIScope *Ty) {
+ const DIScope *Scope = Ty->getScope().resolve();
+ return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
+}
+
+TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
+ // No scope means global scope and that uses the zero index.
+ if (!Scope || isa<DIFile>(Scope))
+ return TypeIndex();
+
+ assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
+
+ // Check if we've already translated this scope.
+ auto I = TypeIndices.find({Scope, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Build the fully qualified name of the scope.
+ std::string ScopeName = getFullyQualifiedName(Scope);
+ TypeIndex TI =
+ TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName));
+ return recordTypeIndexForDINode(Scope, TI);
+}
+
+TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
+ assert(SP);
+
+ // Check if we've already translated this subprogram.
+ auto I = TypeIndices.find({SP, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // The display name includes function template arguments. Drop them to match
+ // MSVC.
+ StringRef DisplayName = SP->getDisplayName().split('<').first;
+
+ const DIScope *Scope = SP->getScope().resolve();
+ TypeIndex TI;
+ if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
+ // If the scope is a DICompositeType, then this must be a method. Member
+ // function types take some special handling, and require access to the
+ // subprogram.
+ TypeIndex ClassType = getTypeIndex(Class);
+ MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
+ DisplayName);
+ TI = TypeTable.writeMemberFuncId(MFuncId);
+ } else {
+ // Otherwise, this must be a free function.
+ TypeIndex ParentScope = getScopeIndex(Scope);
+ FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
+ TI = TypeTable.writeFuncId(FuncId);
+ }
+
+ return recordTypeIndexForDINode(SP, TI);
+}
+
+TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class) {
+ // Always use the method declaration as the key for the function type. The
+ // method declaration contains the this adjustment.
+ if (SP->getDeclaration())
+ SP = SP->getDeclaration();
+ assert(!SP->getDeclaration() && "should use declaration as key");
+
+ // Key the MemberFunctionRecord into the map as {SP, Class}. It won't collide
+ // with the MemberFuncIdRecord, which is keyed in as {SP, nullptr}.
+ auto I = TypeIndices.find({SP, Class});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Make sure complete type info for the class is emitted *after* the member
+ // function type, as the complete class type is likely to reference this
+ // member function type.
+ TypeLoweringScope S(*this);
+ TypeIndex TI =
+ lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment());
+ return recordTypeIndexForDINode(SP, TI, Class);
+}
+
+TypeIndex CodeViewDebug::recordTypeIndexForDINode(const DINode *Node,
+ TypeIndex TI,
+ const DIType *ClassTy) {
+ auto InsertResult = TypeIndices.insert({{Node, ClassTy}, TI});
+ (void)InsertResult;
+ assert(InsertResult.second && "DINode was already assigned a type index");
+ return TI;
+}
+
+unsigned CodeViewDebug::getPointerSizeInBytes() {
+ return MMI->getModule()->getDataLayout().getPointerSizeInBits() / 8;
+}
+
+void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
+ const DILocation *InlinedAt) {
+ if (InlinedAt) {
+ // This variable was inlined. Associate it with the InlineSite.
+ const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
+ InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
+ Site.InlinedLocals.emplace_back(Var);
+ } else {
+ // This variable goes in the main ProcSym.
+ CurFn->Locals.emplace_back(Var);
+ }
+}
+
+static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
+ const DILocation *Loc) {
+ auto B = Locs.begin(), E = Locs.end();
+ if (std::find(B, E, Loc) == E)
+ Locs.push_back(Loc);
+}
+
+void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
+ const MachineFunction *MF) {
+ // Skip this instruction if it has the same location as the previous one.
+ if (DL == CurFn->LastLoc)
+ return;
+
+ const DIScope *Scope = DL.get()->getScope();
+ if (!Scope)
+ return;
+
+ // Skip this line if it is longer than the maximum we can record.
+ LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
+ if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
+ LI.isNeverStepInto())
+ return;
+
+ ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
+ if (CI.getStartColumn() != DL.getCol())
+ return;
+
+ if (!CurFn->HaveLineInfo)
+ CurFn->HaveLineInfo = true;
+ unsigned FileId = 0;
+ if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ FileId = CurFn->LastFileId;
+ else
+ FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
+ CurFn->LastLoc = DL;
+
+ unsigned FuncId = CurFn->FuncId;
+ if (const DILocation *SiteLoc = DL->getInlinedAt()) {
+ const DILocation *Loc = DL.get();
+
+ // If this location was actually inlined from somewhere else, give it the ID
+ // of the inline call site.
+ FuncId =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()).SiteFuncId;
+
+ // Ensure we have links in the tree of inline call sites.
+ bool FirstLoc = true;
+ while ((SiteLoc = Loc->getInlinedAt())) {
+ InlineSite &Site =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram());
+ if (!FirstLoc)
+ addLocIfNotPresent(Site.ChildSites, Loc);
+ FirstLoc = false;
+ Loc = SiteLoc;
+ }
+ addLocIfNotPresent(CurFn->ChildSites, Loc);
+ }
+
+ OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
+ /*PrologueEnd=*/false,
+ /*IsStmt=*/false, DL->getFilename());
+}
+
+void CodeViewDebug::emitCodeViewMagicVersion() {
+ OS.EmitValueToAlignment(4);
+ OS.AddComment("Debug section magic");
+ OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4);
+}
+
+void CodeViewDebug::endModule() {
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ assert(Asm != nullptr);
+
+ // The COFF .debug$S section consists of several subsections, each starting
+ // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+ // of the payload followed by the payload itself. The subsections are 4-byte
+ // aligned.
+
+ // Use the generic .debug$S section, and make a subsection for all the inlined
+ // subprograms.
+ switchToDebugSectionForSymbol(nullptr);
+ emitInlineeLinesSubsection();
+
+ // Emit per-function debug information.
+ for (auto &P : FnDebugInfo)
+ if (!P.first->isDeclarationForLinker())
+ emitDebugInfoForFunction(P.first, P.second);
+
+ // Emit global variable debug information.
+ setCurrentSubprogram(nullptr);
+ emitDebugInfoForGlobals();
+
+ // Emit retained types.
+ emitDebugInfoForRetainedTypes();
+
+ // Switch back to the generic .debug$S section after potentially processing
+ // comdat symbol sections.
+ switchToDebugSectionForSymbol(nullptr);
+
+ // Emit UDT records for any types used by global variables.
+ if (!GlobalUDTs.empty()) {
+ MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ emitDebugInfoForUDTs(GlobalUDTs);
+ endCVSubsection(SymbolsEnd);
+ }
+
+ // This subsection holds a file index to offset in string table table.
+ OS.AddComment("File index to string table offset subsection");
+ OS.EmitCVFileChecksumsDirective();
+
+ // This subsection holds the string table.
+ OS.AddComment("String table");
+ OS.EmitCVStringTableDirective();
+
+ // Emit type information last, so that any types we translate while emitting
+ // function info are included.
+ emitTypeInformation();
+
+ clear();
+}
+
+static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
+ // Microsoft's linker seems to have trouble with symbol names longer than
+ // 0xffd8 bytes.
+ S = S.substr(0, 0xffd8);
+ SmallString<32> NullTerminatedString(S);
+ NullTerminatedString.push_back('\0');
+ OS.EmitBytes(NullTerminatedString);
+}
+
+void CodeViewDebug::emitTypeInformation() {
+ // Do nothing if we have no debug info or if no non-trivial types were emitted
+ // to TypeTable during codegen.
+ NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes)
+ return;
+ if (TypeTable.empty())
+ return;
+
+ // Start the .debug$T section with 0x4.
+ OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
+ emitCodeViewMagicVersion();
+
+ SmallString<8> CommentPrefix;
+ if (OS.isVerboseAsm()) {
+ CommentPrefix += '\t';
+ CommentPrefix += Asm->MAI->getCommentString();
+ CommentPrefix += ' ';
+ }
+
+ CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false);
+ TypeTable.ForEachRecord(
+ [&](TypeIndex Index, StringRef Record) {
+ if (OS.isVerboseAsm()) {
+ // Emit a block comment describing the type record for readability.
+ SmallString<512> CommentBlock;
+ raw_svector_ostream CommentOS(CommentBlock);
+ ScopedPrinter SP(CommentOS);
+ SP.setPrefix(CommentPrefix);
+ CVTD.setPrinter(&SP);
+ Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()});
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+ // emitRawComment will insert its own tab and comment string before
+ // the first line, so strip off our first one. It also prints its own
+ // newline.
+ OS.emitRawComment(
+ CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
+ } else {
+#ifndef NDEBUG
+ // Assert that the type data is valid even if we aren't dumping
+ // comments. The MSVC linker doesn't do much type record validation,
+ // so the first link of an invalid type record can succeed while
+ // subsequent links will fail with LNK1285.
+ ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()});
+ CVTypeArray Types;
+ StreamReader Reader(Stream);
+ Error E = Reader.readArray(Types, Reader.getLength());
+ if (!E) {
+ TypeVisitorCallbacks C;
+ E = CVTypeVisitor(C).visitTypeStream(Types);
+ }
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+#endif
+ }
+ OS.EmitBinaryData(Record);
+ });
+}
+
+void CodeViewDebug::emitInlineeLinesSubsection() {
+ if (InlinedSubprograms.empty())
+ return;
+
+ OS.AddComment("Inlinee lines subsection");
+ MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines);
+
+ // We don't provide any extra file info.
+ // FIXME: Find out if debuggers use this info.
+ OS.AddComment("Inlinee lines signature");
+ OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
+
+ for (const DISubprogram *SP : InlinedSubprograms) {
+ assert(TypeIndices.count({SP, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}];
+
+ OS.AddBlankLine();
+ unsigned FileId = maybeRecordFile(SP->getFile());
+ OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " +
+ SP->getFilename() + Twine(':') + Twine(SP->getLine()));
+ OS.AddBlankLine();
+ // The filechecksum table uses 8 byte entries for now, and file ids start at
+ // 1.
+ unsigned FileOffset = (FileId - 1) * 8;
+ OS.AddComment("Type index of inlined function");
+ OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+ OS.AddComment("Offset into filechecksum table");
+ OS.EmitIntValue(FileOffset, 4);
+ OS.AddComment("Starting line number");
+ OS.EmitIntValue(SP->getLine(), 4);
+ }
+
+ endCVSubsection(InlineEnd);
+}
+
+void CodeViewDebug::collectInlineSiteChildren(
+ SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
+ const InlineSite &Site) {
+ for (const DILocation *ChildSiteLoc : Site.ChildSites) {
+ auto I = FI.InlineSites.find(ChildSiteLoc);
+ const InlineSite &ChildSite = I->second;
+ Children.push_back(ChildSite.SiteFuncId);
+ collectInlineSiteChildren(Children, FI, ChildSite);
+ }
+}
+
+void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
+ const DILocation *InlinedAt,
+ const InlineSite &Site) {
+ MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(),
+ *InlineEnd = MMI->getContext().createTempSymbol();
+
+ assert(TypeIndices.count({Site.Inlinee, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
+
+ // SymbolRecord
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength
+ OS.EmitLabel(InlineBegin);
+ OS.AddComment("Record kind: S_INLINESITE");
+ OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind
+
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Inlinee type index");
+ OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+
+ unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
+ unsigned StartLineNum = Site.Inlinee->getLine();
+ SmallVector<unsigned, 3> SecondaryFuncIds;
+ collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
+
+ OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
+ FI.Begin, FI.End, SecondaryFuncIds);
+
+ OS.EmitLabel(InlineEnd);
+
+ emitLocalVariableList(Site.InlinedLocals);
+
+ // Recurse on child inlined call sites before closing the scope.
+ for (const DILocation *ChildSite : Site.ChildSites) {
+ auto I = FI.InlineSites.find(ChildSite);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, ChildSite, I->second);
+ }
+
+ // Close the scope.
+ OS.AddComment("Record length");
+ OS.EmitIntValue(2, 2); // RecordLength
+ OS.AddComment("Record kind: S_INLINESITE_END");
+ OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind
+}
+
+void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
+ // If we have a symbol, it may be in a section that is COMDAT. If so, find the
+ // comdat key. A section may be comdat because of -ffunction-sections or
+ // because it is comdat in the IR.
+ MCSectionCOFF *GVSec =
+ GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr;
+ const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr;
+
+ MCSectionCOFF *DebugSec = cast<MCSectionCOFF>(
+ Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+ DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
+
+ OS.SwitchSection(DebugSec);
+
+ // Emit the magic version number if this is the first time we've switched to
+ // this section.
+ if (ComdatDebugSections.insert(DebugSec).second)
+ emitCodeViewMagicVersion();
+}
+
+void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
+ FunctionInfo &FI) {
+ // For each function there is a separate subsection
+ // which holds the PC to file:line table.
+ const MCSymbol *Fn = Asm->getSymbol(GV);
+ assert(Fn);
+
+ // Switch to the to a comdat section, if appropriate.
+ switchToDebugSectionForSymbol(Fn);
+
+ std::string FuncName;
+ auto *SP = GV->getSubprogram();
+ assert(SP);
+ setCurrentSubprogram(SP);
+
+ // If we have a display name, build the fully qualified name by walking the
+ // chain of scopes.
+ if (!SP->getDisplayName().empty())
+ FuncName =
+ getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
+
+ // If our DISubprogram name is empty, use the mangled name.
+ if (FuncName.empty())
+ FuncName = GlobalValue::getRealLinkageName(GV->getName());
+
+ // Emit a symbol subsection, required by VS2012+ to find function boundaries.
+ OS.AddComment("Symbol subsection for " + Twine(FuncName));
+ MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ {
+ MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
+ *ProcRecordEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
+ OS.EmitLabel(ProcRecordBegin);
+
+ if (GV->hasLocalLinkage()) {
+ OS.AddComment("Record kind: S_LPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
+ } else {
+ OS.AddComment("Record kind: S_GPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
+ }
+
+ // These fields are filled in by tools like CVPACK which run after the fact.
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrNext");
+ OS.EmitIntValue(0, 4);
+ // This is the important bit that tells the debugger where the function
+ // code is located and what's its size:
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4);
+ OS.AddComment("Offset after prologue");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Offset before epilogue");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Function type index");
+ OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
+ OS.AddComment("Function section relative address");
+ OS.EmitCOFFSecRel32(Fn);
+ OS.AddComment("Function section index");
+ OS.EmitCOFFSectionIndex(Fn);
+ OS.AddComment("Flags");
+ OS.EmitIntValue(0, 1);
+ // Emit the function display name as a null-terminated string.
+ OS.AddComment("Function name");
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, FuncName);
+ OS.EmitLabel(ProcRecordEnd);
+
+ emitLocalVariableList(FI.Locals);
+
+ // Emit inlined call site information. Only emit functions inlined directly
+ // into the parent function. We'll emit the other sites recursively as part
+ // of their parent inline site.
+ for (const DILocation *InlinedAt : FI.ChildSites) {
+ auto I = FI.InlineSites.find(InlinedAt);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, InlinedAt, I->second);
+ }
+
+ if (SP != nullptr)
+ emitDebugInfoForUDTs(LocalUDTs);
+
+ // We're done with this function.
+ OS.AddComment("Record length");
+ OS.EmitIntValue(0x0002, 2);
+ OS.AddComment("Record kind: S_PROC_ID_END");
+ OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ }
+ endCVSubsection(SymbolsEnd);
+
+ // We have an assembler directive that takes care of the whole line table.
+ OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
+}
+
+CodeViewDebug::LocalVarDefRange
+CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
+ LocalVarDefRange DR;
+ DR.InMemory = -1;
+ DR.DataOffset = Offset;
+ assert(DR.DataOffset == Offset && "truncation");
+ DR.StructOffset = 0;
+ DR.CVRegister = CVRegister;
+ return DR;
+}
+
+CodeViewDebug::LocalVarDefRange
+CodeViewDebug::createDefRangeReg(uint16_t CVRegister) {
+ LocalVarDefRange DR;
+ DR.InMemory = 0;
+ DR.DataOffset = 0;
+ DR.StructOffset = 0;
+ DR.CVRegister = CVRegister;
+ return DR;
+}
+
+void CodeViewDebug::collectVariableInfoFromMMITable(
+ DenseSet<InlinedVariable> &Processed) {
+ const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget();
+ const TargetFrameLowering *TFI = TSI.getFrameLowering();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+
+ for (const MachineModuleInfo::VariableDbgInfo &VI :
+ MMI->getVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+ "Expected inlined-at fields to agree");
+
+ Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt()));
+ LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
+
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ // Get the frame register used and the offset.
+ unsigned FrameReg = 0;
+ int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
+
+ // Calculate the label ranges.
+ LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset);
+ for (const InsnRange &Range : Scope->getRanges()) {
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ End = End ? End : Asm->getFunctionEnd();
+ DefRange.Ranges.emplace_back(Begin, End);
+ }
+
+ LocalVariable Var;
+ Var.DIVar = VI.Var;
+ Var.DefRanges.emplace_back(std::move(DefRange));
+ recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt());
+ }
+}
+
+void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
+ DenseSet<InlinedVariable> Processed;
+ // Grab the variable info that was squirreled away in the MMI side-table.
+ collectVariableInfoFromMMITable(Processed);
+
+ const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+
+ for (const auto &I : DbgValues) {
+ InlinedVariable IV = I.first;
+ if (Processed.count(IV))
+ continue;
+ const DILocalVariable *DIVar = IV.first;
+ const DILocation *InlinedAt = IV.second;
+
+ // Instruction ranges, specifying where IV is accessible.
+ const auto &Ranges = I.second;
+
+ LexicalScope *Scope = nullptr;
+ if (InlinedAt)
+ Scope = LScopes.findInlinedScope(DIVar->getScope(), InlinedAt);
+ else
+ Scope = LScopes.findLexicalScope(DIVar->getScope());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ LocalVariable Var;
+ Var.DIVar = DIVar;
+
+ // Calculate the definition ranges.
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const InsnRange &Range = *I;
+ const MachineInstr *DVInst = Range.first;
+ assert(DVInst->isDebugValue() && "Invalid History entry");
+ const DIExpression *DIExpr = DVInst->getDebugExpression();
+
+ // Bail if there is a complex DWARF expression for now.
+ if (DIExpr && DIExpr->getNumElements() > 0)
+ continue;
+
+ // Bail if operand 0 is not a valid register. This means the variable is a
+ // simple constant, or is described by a complex expression.
+ // FIXME: Find a way to represent constant variables, since they are
+ // relatively common.
+ unsigned Reg =
+ DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0;
+ if (Reg == 0)
+ continue;
+
+ // Handle the two cases we can handle: indirect in memory and in register.
+ bool IsIndirect = DVInst->getOperand(1).isImm();
+ unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg());
+ {
+ LocalVarDefRange DefRange;
+ if (IsIndirect) {
+ int64_t Offset = DVInst->getOperand(1).getImm();
+ DefRange = createDefRangeMem(CVReg, Offset);
+ } else {
+ DefRange = createDefRangeReg(CVReg);
+ }
+ if (Var.DefRanges.empty() ||
+ Var.DefRanges.back().isDifferentLocation(DefRange)) {
+ Var.DefRanges.emplace_back(std::move(DefRange));
+ }
+ }
+
+ // Compute the label range.
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ if (!End) {
+ if (std::next(I) != E)
+ End = getLabelBeforeInsn(std::next(I)->first);
+ else
+ End = Asm->getFunctionEnd();
+ }
+
+ // If the last range end is our begin, just extend the last range.
+ // Otherwise make a new range.
+ SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges =
+ Var.DefRanges.back().Ranges;
+ if (!Ranges.empty() && Ranges.back().second == Begin)
+ Ranges.back().second = End;
+ else
+ Ranges.emplace_back(Begin, End);
+
+ // FIXME: Do more range combining.
+ }
+
+ recordLocalVariable(std::move(Var), InlinedAt);
+ }
+}
+
+void CodeViewDebug::beginFunction(const MachineFunction *MF) {
+ assert(!CurFn && "Can't process two functions at once!");
+
+ if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
+ return;
+
+ DebugHandlerBase::beginFunction(MF);
+
+ const Function *GV = MF->getFunction();
+ assert(FnDebugInfo.count(GV) == false);
+ CurFn = &FnDebugInfo[GV];
+ CurFn->FuncId = NextFuncId++;
+ CurFn->Begin = Asm->getFunctionBegin();
+
+ // Find the end of the function prolog. First known non-DBG_VALUE and
+ // non-frame setup location marks the beginning of the function body.
+ // FIXME: is there a simpler a way to do this? Can we just search
+ // for the first instruction of the function, not the last of the prolog?
+ DebugLoc PrologEndLoc;
+ bool EmptyPrologue = true;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ MI.getDebugLoc()) {
+ PrologEndLoc = MI.getDebugLoc();
+ break;
+ } else if (!MI.isDebugValue()) {
+ EmptyPrologue = false;
+ }
+ }
+ }
+
+ // Record beginning of function if we have a non-empty prologue.
+ if (PrologEndLoc && !EmptyPrologue) {
+ DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
+ maybeRecordLocation(FnStartDL, MF);
+ }
+}
+
+void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
+ // Don't record empty UDTs.
+ if (Ty->getName().empty())
+ return;
+
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
+ Ty->getScope().resolve(), QualifiedNameComponents);
+
+ std::string FullyQualifiedName =
+ getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
+
+ if (ClosestSubprogram == nullptr)
+ GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+ else if (ClosestSubprogram == CurrentSubprogram)
+ LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+
+ // TODO: What if the ClosestSubprogram is neither null or the current
+ // subprogram? Currently, the UDT just gets dropped on the floor.
+ //
+ // The current behavior is not desirable. To get maximal fidelity, we would
+ // need to perform all type translation before beginning emission of .debug$S
+ // and then make LocalUDTs a member of FunctionInfo
+}
+
+TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
+ // Generic dispatch for lowering an unknown type.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_array_type:
+ return lowerTypeArray(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_typedef:
+ return lowerTypeAlias(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_base_type:
+ return lowerTypeBasic(cast<DIBasicType>(Ty));
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ return lowerTypePointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_ptr_to_member_type:
+ return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ return lowerTypeModifier(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_subroutine_type:
+ if (ClassTy) {
+ // The member function type of a member function pointer has no
+ // ThisAdjustment.
+ return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy,
+ /*ThisAdjustment=*/0);
+ }
+ return lowerTypeFunction(cast<DISubroutineType>(Ty));
+ case dwarf::DW_TAG_enumeration_type:
+ return lowerTypeEnum(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ return lowerTypeClass(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_union_type:
+ return lowerTypeUnion(cast<DICompositeType>(Ty));
+ default:
+ // Use the null type index.
+ return TypeIndex();
+ }
+}
+
+TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
+ DITypeRef UnderlyingTypeRef = Ty->getBaseType();
+ TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
+ StringRef TypeName = Ty->getName();
+
+ addToUDTs(Ty, UnderlyingTypeIndex);
+
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) &&
+ TypeName == "HRESULT")
+ return TypeIndex(SimpleTypeKind::HResult);
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::UInt16Short) &&
+ TypeName == "wchar_t")
+ return TypeIndex(SimpleTypeKind::WideCharacter);
+
+ return UnderlyingTypeIndex;
+}
+
+TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
+ DITypeRef ElementTypeRef = Ty->getBaseType();
+ TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = Asm->MAI->getPointerSize() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
+
+ bool UndefinedSubrange = false;
+
+ // FIXME:
+ // There is a bug in the front-end where an array of a structure, which was
+ // declared as incomplete structure first, ends up not getting a size assigned
+ // to it. (PR28303)
+ // Example:
+ // struct A(*p)[3];
+ // struct A { int f; } a[3];
+ //
+ // This needs to be fixed in the front-end, but in the meantime we don't want
+ // to trigger an assertion because of this.
+ if (Ty->getSizeInBits() == 0) {
+ UndefinedSubrange = true;
+ }
+
+ // Add subranges to array type.
+ DINodeArray Elements = Ty->getElements();
+ for (int i = Elements.size() - 1; i >= 0; --i) {
+ const DINode *Element = Elements[i];
+ assert(Element->getTag() == dwarf::DW_TAG_subrange_type);
+
+ const DISubrange *Subrange = cast<DISubrange>(Element);
+ assert(Subrange->getLowerBound() == 0 &&
+ "codeview doesn't support subranges with lower bounds");
+ int64_t Count = Subrange->getCount();
+
+ // Variable Length Array (VLA) has Count equal to '-1'.
+ // Replace with Count '1', assume it is the minimum VLA length.
+ // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
+ if (Count == -1) {
+ Count = 1;
+ UndefinedSubrange = true;
+ }
+
+ StringRef Name = (i == 0) ? Ty->getName() : "";
+ // Update the element size and element type index for subsequent subranges.
+ ElementSize *= Count;
+ ElementTypeIndex = TypeTable.writeArray(
+ ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name));
+ }
+
+ (void)UndefinedSubrange;
+ assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8));
+
+ return ElementTypeIndex;
+}
+
+TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
+ TypeIndex Index;
+ dwarf::TypeKind Kind;
+ uint32_t ByteSize;
+
+ Kind = static_cast<dwarf::TypeKind>(Ty->getEncoding());
+ ByteSize = Ty->getSizeInBits() / 8;
+
+ SimpleTypeKind STK = SimpleTypeKind::None;
+ switch (Kind) {
+ case dwarf::DW_ATE_address:
+ // FIXME: Translate
+ break;
+ case dwarf::DW_ATE_boolean:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Boolean8; break;
+ case 2: STK = SimpleTypeKind::Boolean16; break;
+ case 4: STK = SimpleTypeKind::Boolean32; break;
+ case 8: STK = SimpleTypeKind::Boolean64; break;
+ case 16: STK = SimpleTypeKind::Boolean128; break;
+ }
+ break;
+ case dwarf::DW_ATE_complex_float:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Complex16; break;
+ case 4: STK = SimpleTypeKind::Complex32; break;
+ case 8: STK = SimpleTypeKind::Complex64; break;
+ case 10: STK = SimpleTypeKind::Complex80; break;
+ case 16: STK = SimpleTypeKind::Complex128; break;
+ }
+ break;
+ case dwarf::DW_ATE_float:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Float16; break;
+ case 4: STK = SimpleTypeKind::Float32; break;
+ case 6: STK = SimpleTypeKind::Float48; break;
+ case 8: STK = SimpleTypeKind::Float64; break;
+ case 10: STK = SimpleTypeKind::Float80; break;
+ case 16: STK = SimpleTypeKind::Float128; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::SByte; break;
+ case 2: STK = SimpleTypeKind::Int16Short; break;
+ case 4: STK = SimpleTypeKind::Int32; break;
+ case 8: STK = SimpleTypeKind::Int64Quad; break;
+ case 16: STK = SimpleTypeKind::Int128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_unsigned:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Byte; break;
+ case 2: STK = SimpleTypeKind::UInt16Short; break;
+ case 4: STK = SimpleTypeKind::UInt32; break;
+ case 8: STK = SimpleTypeKind::UInt64Quad; break;
+ case 16: STK = SimpleTypeKind::UInt128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_UTF:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Character16; break;
+ case 4: STK = SimpleTypeKind::Character32; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::SignedCharacter;
+ break;
+ case dwarf::DW_ATE_unsigned_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::UnsignedCharacter;
+ break;
+ default:
+ break;
+ }
+
+ // Apply some fixups based on the source-level type name.
+ if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int")
+ STK = SimpleTypeKind::Int32Long;
+ if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int")
+ STK = SimpleTypeKind::UInt32Long;
+ if (STK == SimpleTypeKind::UInt16Short &&
+ (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
+ STK = SimpleTypeKind::WideCharacter;
+ if ((STK == SimpleTypeKind::SignedCharacter ||
+ STK == SimpleTypeKind::UnsignedCharacter) &&
+ Ty->getName() == "char")
+ STK = SimpleTypeKind::NarrowCharacter;
+
+ return TypeIndex(STK);
+}
+
+TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
+ TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
+
+ // While processing the type being pointed to it is possible we already
+ // created this pointer type. If so, we check here and return the existing
+ // pointer type.
+ auto I = TypeIndices.find({Ty, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Pointers to simple types can use SimpleTypeMode, rather than having a
+ // dedicated pointer type record.
+ if (PointeeTI.isSimple() &&
+ PointeeTI.getSimpleMode() == SimpleTypeMode::Direct &&
+ Ty->getTag() == dwarf::DW_TAG_pointer_type) {
+ SimpleTypeMode Mode = Ty->getSizeInBits() == 64
+ ? SimpleTypeMode::NearPointer64
+ : SimpleTypeMode::NearPointer32;
+ return TypeIndex(PointeeTI.getSimpleKind(), Mode);
+ }
+
+ PointerKind PK =
+ Ty->getSizeInBits() == 64 ? PointerKind::Near64 : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ switch (Ty->getTag()) {
+ default: llvm_unreachable("not a pointer tag type");
+ case dwarf::DW_TAG_pointer_type:
+ PM = PointerMode::Pointer;
+ break;
+ case dwarf::DW_TAG_reference_type:
+ PM = PointerMode::LValueReference;
+ break;
+ case dwarf::DW_TAG_rvalue_reference_type:
+ PM = PointerMode::RValueReference;
+ break;
+ }
+ // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method
+ // 'this' pointer, but not normal contexts. Figure out what we're supposed to
+ // do.
+ PointerOptions PO = PointerOptions::None;
+ PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
+ return TypeTable.writePointer(PR);
+}
+
+static PointerToMemberRepresentation
+translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {
+ // SizeInBytes being zero generally implies that the member pointer type was
+ // incomplete, which can happen if it is part of a function prototype. In this
+ // case, use the unknown model instead of the general model.
+ if (IsPMF) {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralFunction;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceFunction;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceFunction;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceFunction;
+ }
+ } else {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralData;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceData;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceData;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceData;
+ }
+ }
+ llvm_unreachable("invalid ptr to member representation");
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
+ assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
+ TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
+ TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
+ PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
+ PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
+ : PointerMode::PointerToDataMember;
+ PointerOptions PO = PointerOptions::None; // FIXME
+ assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big");
+ uint8_t SizeInBytes = Ty->getSizeInBits() / 8;
+ MemberPointerInfo MPI(
+ ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
+ PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
+ return TypeTable.writePointer(PR);
+}
+
+/// Given a DWARF calling convention, get the CodeView equivalent. If we don't
+/// have a translation, use the NearC convention.
+static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) {
+ switch (DwarfCC) {
+ case dwarf::DW_CC_normal: return CallingConvention::NearC;
+ case dwarf::DW_CC_BORLAND_msfastcall: return CallingConvention::NearFast;
+ case dwarf::DW_CC_BORLAND_thiscall: return CallingConvention::ThisCall;
+ case dwarf::DW_CC_BORLAND_stdcall: return CallingConvention::NearStdCall;
+ case dwarf::DW_CC_BORLAND_pascal: return CallingConvention::NearPascal;
+ case dwarf::DW_CC_LLVM_vectorcall: return CallingConvention::NearVector;
+ }
+ return CallingConvention::NearC;
+}
+
+TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
+ ModifierOptions Mods = ModifierOptions::None;
+ bool IsModifier = true;
+ const DIType *BaseTy = Ty;
+ while (IsModifier && BaseTy) {
+ // FIXME: Need to add DWARF tag for __unaligned.
+ switch (BaseTy->getTag()) {
+ case dwarf::DW_TAG_const_type:
+ Mods |= ModifierOptions::Const;
+ break;
+ case dwarf::DW_TAG_volatile_type:
+ Mods |= ModifierOptions::Volatile;
+ break;
+ default:
+ IsModifier = false;
+ break;
+ }
+ if (IsModifier)
+ BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
+ }
+ TypeIndex ModifiedTI = getTypeIndex(BaseTy);
+
+ // While processing the type being pointed to, it is possible we already
+ // created this modifier type. If so, we check here and return the existing
+ // modifier type.
+ auto I = TypeIndices.find({Ty, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ ModifierRecord MR(ModifiedTI, Mods);
+ return TypeTable.writeModifier(MR);
+}
+
+TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
+ SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
+ for (DITypeRef ArgTypeRef : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ ArrayRef<TypeIndex> ArgTypeIndices = None;
+ if (!ReturnAndArgTypeIndices.empty()) {
+ auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
+ ReturnTypeIndex = ReturnAndArgTypesRef.front();
+ ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
+ }
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
+ ArgTypeIndices.size(), ArgListIndex);
+ return TypeTable.writeProcedure(Procedure);
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
+ const DIType *ClassTy,
+ int ThisAdjustment) {
+ // Lower the containing class type.
+ TypeIndex ClassType = getTypeIndex(ClassTy);
+
+ SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
+ for (DITypeRef ArgTypeRef : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ ArrayRef<TypeIndex> ArgTypeIndices = None;
+ if (!ReturnAndArgTypeIndices.empty()) {
+ auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
+ ReturnTypeIndex = ReturnAndArgTypesRef.front();
+ ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
+ }
+ TypeIndex ThisTypeIndex = TypeIndex::Void();
+ if (!ArgTypeIndices.empty()) {
+ ThisTypeIndex = ArgTypeIndices.front();
+ ArgTypeIndices = ArgTypeIndices.drop_front();
+ }
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ // TODO: Need to use the correct values for:
+ // FunctionOptions
+ // ThisPointerAdjustment.
+ TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord(
+ ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None,
+ ArgTypeIndices.size(), ArgListIndex, ThisAdjustment));
+
+ return TI;
+}
+
+static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
+ switch (Flags & DINode::FlagAccessibility) {
+ case DINode::FlagPrivate: return MemberAccess::Private;
+ case DINode::FlagPublic: return MemberAccess::Public;
+ case DINode::FlagProtected: return MemberAccess::Protected;
+ case 0:
+ // If there was no explicit access control, provide the default for the tag.
+ return RecordTag == dwarf::DW_TAG_class_type ? MemberAccess::Private
+ : MemberAccess::Public;
+ }
+ llvm_unreachable("access flags are exclusive");
+}
+
+static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) {
+ if (SP->isArtificial())
+ return MethodOptions::CompilerGenerated;
+
+ // FIXME: Handle other MethodOptions.
+
+ return MethodOptions::None;
+}
+
+static MethodKind translateMethodKindFlags(const DISubprogram *SP,
+ bool Introduced) {
+ switch (SP->getVirtuality()) {
+ case dwarf::DW_VIRTUALITY_none:
+ break;
+ case dwarf::DW_VIRTUALITY_virtual:
+ return Introduced ? MethodKind::IntroducingVirtual : MethodKind::Virtual;
+ case dwarf::DW_VIRTUALITY_pure_virtual:
+ return Introduced ? MethodKind::PureIntroducingVirtual
+ : MethodKind::PureVirtual;
+ default:
+ llvm_unreachable("unhandled virtuality case");
+ }
+
+ // FIXME: Get Clang to mark DISubprogram as static and do something with it.
+
+ return MethodKind::Vanilla;
+}
+
+static TypeRecordKind getRecordKind(const DICompositeType *Ty) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type: return TypeRecordKind::Class;
+ case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct;
+ }
+ llvm_unreachable("unexpected tag");
+}
+
+/// Return ClassOptions that should be present on both the forward declaration
+/// and the defintion of a tag type.
+static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::None;
+
+ // MSVC always sets this flag, even for local types. Clang doesn't always
+ // appear to give every type a linkage name, which may be problematic for us.
+ // FIXME: Investigate the consequences of not following them here.
+ if (!Ty->getIdentifier().empty())
+ CO |= ClassOptions::HasUniqueName;
+
+ // Put the Nested flag on a type if it appears immediately inside a tag type.
+ // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
+ // here. That flag is only set on definitions, and not forward declarations.
+ const DIScope *ImmediateScope = Ty->getScope().resolve();
+ if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
+ CO |= ClassOptions::Nested;
+
+ // Put the Scoped flag on function-local types.
+ for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+ Scope = Scope->getScope().resolve()) {
+ if (isa<DISubprogram>(Scope)) {
+ CO |= ClassOptions::Scoped;
+ break;
+ }
+ }
+
+ return CO;
+}
+
+TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FTI;
+ unsigned EnumeratorCount = 0;
+
+ if (Ty->isForwardDecl()) {
+ CO |= ClassOptions::ForwardReference;
+ } else {
+ FieldListRecordBuilder Fields;
+ for (const DINode *Element : Ty->getElements()) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
+ Fields.writeEnumerator(EnumeratorRecord(
+ MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()),
+ Enumerator->getName()));
+ EnumeratorCount++;
+ }
+ }
+ FTI = TypeTable.writeFieldList(Fields);
+ }
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName,
+ Ty->getIdentifier(),
+ getTypeIndex(Ty->getBaseType())));
+}
+
+//===----------------------------------------------------------------------===//
+// ClassInfo
+//===----------------------------------------------------------------------===//
+
+struct llvm::ClassInfo {
+ struct MemberInfo {
+ const DIDerivedType *MemberTypeNode;
+ uint64_t BaseOffset;
+ };
+ // [MemberInfo]
+ typedef std::vector<MemberInfo> MemberList;
+
+ typedef TinyPtrVector<const DISubprogram *> MethodsList;
+ // MethodName -> MethodsList
+ typedef MapVector<MDString *, MethodsList> MethodsMap;
+
+ /// Base classes.
+ std::vector<const DIDerivedType *> Inheritance;
+
+ /// Direct members.
+ MemberList Members;
+ // Direct overloaded methods gathered by name.
+ MethodsMap Methods;
+
+ std::vector<const DICompositeType *> NestedClasses;
+};
+
+void CodeViewDebug::clear() {
+ assert(CurFn == nullptr);
+ FileIdMap.clear();
+ FnDebugInfo.clear();
+ FileToFilepathMap.clear();
+ LocalUDTs.clear();
+ GlobalUDTs.clear();
+ TypeIndices.clear();
+ CompleteTypeIndices.clear();
+}
+
+void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
+ const DIDerivedType *DDTy) {
+ if (!DDTy->getName().empty()) {
+ Info.Members.push_back({DDTy, 0});
+ return;
+ }
+ // An unnamed member must represent a nested struct or union. Add all the
+ // indirect fields to the current record.
+ assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
+ uint64_t Offset = DDTy->getOffsetInBits();
+ const DIType *Ty = DDTy->getBaseType().resolve();
+ const DICompositeType *DCTy = cast<DICompositeType>(Ty);
+ ClassInfo NestedInfo = collectClassInfo(DCTy);
+ for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members)
+ Info.Members.push_back(
+ {IndirectField.MemberTypeNode, IndirectField.BaseOffset + Offset});
+}
+
+ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
+ ClassInfo Info;
+ // Add elements to structure type.
+ DINodeArray Elements = Ty->getElements();
+ for (auto *Element : Elements) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (!Element)
+ continue;
+ if (auto *SP = dyn_cast<DISubprogram>(Element)) {
+ Info.Methods[SP->getRawName()].push_back(SP);
+ } else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+ if (DDTy->getTag() == dwarf::DW_TAG_member) {
+ collectMemberInfo(Info, DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
+ Info.Inheritance.push_back(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
+ // Ignore friend members. It appears that MSVC emitted info about
+ // friends in the past, but modern versions do not.
+ }
+ // FIXME: Get Clang to emit function virtual table here and handle it.
+ } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
+ Info.NestedClasses.push_back(Composite);
+ }
+ // Skip other unrecognized kinds of elements.
+ }
+ return Info;
+}
+
+TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
+ // First, construct the forward decl. Don't look into Ty to compute the
+ // forward decl options, since it might not be available in all TUs.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord(
+ Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(),
+ TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()));
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
+ // Construct the field list and complete type record.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ TypeIndex VShapeTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, VShapeTI, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+
+ TypeIndex ClassTI = TypeTable.writeClass(ClassRecord(
+ Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI,
+ TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()));
+
+ TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
+ ClassTI, TypeTable.writeStringId(StringIdRecord(
+ TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
+ Ty->getLine()));
+
+ addToUDTs(Ty, ClassTI);
+
+ return ClassTI;
+}
+
+TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ TypeIndex FwdDeclTI =
+ TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0,
+ FullName, Ty->getIdentifier()));
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::Sealed | getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, std::ignore, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ TypeIndex UnionTI = TypeTable.writeUnion(
+ UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName,
+ Ty->getIdentifier()));
+
+ TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
+ UnionTI, TypeTable.writeStringId(StringIdRecord(
+ TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
+ Ty->getLine()));
+
+ addToUDTs(Ty, UnionTI);
+
+ return UnionTI;
+}
+
+std::tuple<TypeIndex, TypeIndex, unsigned, bool>
+CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
+ // Manually count members. MSVC appears to count everything that generates a
+ // field list record. Each individual overload in a method overload group
+ // contributes to this count, even though the overload group is a single field
+ // list record.
+ unsigned MemberCount = 0;
+ ClassInfo Info = collectClassInfo(Ty);
+ FieldListRecordBuilder Fields;
+
+ // Create base classes.
+ for (const DIDerivedType *I : Info.Inheritance) {
+ if (I->getFlags() & DINode::FlagVirtual) {
+ // Virtual base.
+ // FIXME: Emit VBPtrOffset when the frontend provides it.
+ unsigned VBPtrOffset = 0;
+ // FIXME: Despite the accessor name, the offset is really in bytes.
+ unsigned VBTableIndex = I->getOffsetInBits() / 4;
+ Fields.writeVirtualBaseClass(VirtualBaseClassRecord(
+ translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
+ VBTableIndex));
+ } else {
+ assert(I->getOffsetInBits() % 8 == 0 &&
+ "bases must be on byte boundaries");
+ Fields.writeBaseClass(BaseClassRecord(
+ translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8));
+ }
+ }
+
+ // Create members.
+ for (ClassInfo::MemberInfo &MemberInfo : Info.Members) {
+ const DIDerivedType *Member = MemberInfo.MemberTypeNode;
+ TypeIndex MemberBaseType = getTypeIndex(Member->getBaseType());
+ StringRef MemberName = Member->getName();
+ MemberAccess Access =
+ translateAccessFlags(Ty->getTag(), Member->getFlags());
+
+ if (Member->isStaticMember()) {
+ Fields.writeStaticDataMember(
+ StaticDataMemberRecord(Access, MemberBaseType, MemberName));
+ MemberCount++;
+ continue;
+ }
+
+ // Data member.
+ uint64_t MemberOffsetInBits =
+ Member->getOffsetInBits() + MemberInfo.BaseOffset;
+ if (Member->isBitField()) {
+ uint64_t StartBitOffset = MemberOffsetInBits;
+ if (const auto *CI =
+ dyn_cast_or_null<ConstantInt>(Member->getStorageOffsetInBits())) {
+ MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
+ }
+ StartBitOffset -= MemberOffsetInBits;
+ MemberBaseType = TypeTable.writeBitField(BitFieldRecord(
+ MemberBaseType, Member->getSizeInBits(), StartBitOffset));
+ }
+ uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
+ Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType,
+ MemberOffsetInBytes, MemberName));
+ MemberCount++;
+ }
+
+ // Create methods
+ for (auto &MethodItr : Info.Methods) {
+ StringRef Name = MethodItr.first->getString();
+
+ std::vector<OneMethodRecord> Methods;
+ for (const DISubprogram *SP : MethodItr.second) {
+ TypeIndex MethodType = getMemberFunctionType(SP, Ty);
+ bool Introduced = SP->getFlags() & DINode::FlagIntroducedVirtual;
+
+ unsigned VFTableOffset = -1;
+ if (Introduced)
+ VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
+
+ Methods.push_back(
+ OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced),
+ translateMethodOptionFlags(SP),
+ translateAccessFlags(Ty->getTag(), SP->getFlags()),
+ VFTableOffset, Name));
+ MemberCount++;
+ }
+ assert(Methods.size() > 0 && "Empty methods map entry");
+ if (Methods.size() == 1)
+ Fields.writeOneMethod(Methods[0]);
+ else {
+ TypeIndex MethodList =
+ TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods));
+ Fields.writeOverloadedMethod(
+ OverloadedMethodRecord(Methods.size(), MethodList, Name));
+ }
+ }
+
+ // Create nested classes.
+ for (const DICompositeType *Nested : Info.NestedClasses) {
+ NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
+ Fields.writeNestedType(R);
+ MemberCount++;
+ }
+
+ TypeIndex FieldTI = TypeTable.writeFieldList(Fields);
+ return std::make_tuple(FieldTI, TypeIndex(), MemberCount,
+ !Info.NestedClasses.empty());
+}
+
+TypeIndex CodeViewDebug::getVBPTypeIndex() {
+ if (!VBPType.getIndex()) {
+ // Make a 'const int *' type.
+ ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
+ TypeIndex ModifiedTI = TypeTable.writeModifier(MR);
+
+ PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ PointerOptions PO = PointerOptions::None;
+ PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
+
+ VBPType = TypeTable.writePointer(PR);
+ }
+
+ return VBPType;
+}
+
+TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
+ const DIType *Ty = TypeRef.resolve();
+ const DIType *ClassTy = ClassTyRef.resolve();
+
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // Check if we've already translated this type. Don't try to do a
+ // get-or-create style insertion that caches the hash lookup across the
+ // lowerType call. It will update the TypeIndices map.
+ auto I = TypeIndices.find({Ty, ClassTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerType(Ty, ClassTy);
+ return recordTypeIndexForDINode(Ty, TI, ClassTy);
+}
+
+TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
+ const DIType *Ty = TypeRef.resolve();
+
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // If this is a non-record type, the complete type index is the same as the
+ // normal type index. Just call getTypeIndex.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ break;
+ default:
+ return getTypeIndex(Ty);
+ }
+
+ // Check if we've already translated the complete record type. Lowering a
+ // complete type should never trigger lowering another complete type, so we
+ // can reuse the hash table lookup result.
+ const auto *CTy = cast<DICompositeType>(Ty);
+ auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+ if (!InsertResult.second)
+ return InsertResult.first->second;
+
+ TypeLoweringScope S(*this);
+
+ // Make sure the forward declaration is emitted first. It's unclear if this
+ // is necessary, but MSVC does it, and we should follow suit until we can show
+ // otherwise.
+ TypeIndex FwdDeclTI = getTypeIndex(CTy);
+
+ // Just use the forward decl if we don't have complete type info. This might
+ // happen if the frontend is using modules and expects the complete definition
+ // to be emitted elsewhere.
+ if (CTy->isForwardDecl())
+ return FwdDeclTI;
+
+ TypeIndex TI;
+ switch (CTy->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ TI = lowerCompleteTypeClass(CTy);
+ break;
+ case dwarf::DW_TAG_union_type:
+ TI = lowerCompleteTypeUnion(CTy);
+ break;
+ default:
+ llvm_unreachable("not a record");
+ }
+
+ InsertResult.first->second = TI;
+ return TI;
+}
+
+/// Emit all the deferred complete record types. Try to do this in FIFO order,
+/// and do this until fixpoint, as each complete record type typically
+/// references
+/// many other record types.
+void CodeViewDebug::emitDeferredCompleteTypes() {
+ SmallVector<const DICompositeType *, 4> TypesToEmit;
+ while (!DeferredCompleteTypes.empty()) {
+ std::swap(DeferredCompleteTypes, TypesToEmit);
+ for (const DICompositeType *RecordTy : TypesToEmit)
+ getCompleteTypeIndex(RecordTy);
+ TypesToEmit.clear();
+ }
+}
+
+void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
+ // Get the sorted list of parameters and emit them first.
+ SmallVector<const LocalVariable *, 6> Params;
+ for (const LocalVariable &L : Locals)
+ if (L.DIVar->isParameter())
+ Params.push_back(&L);
+ std::sort(Params.begin(), Params.end(),
+ [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
+ for (const LocalVariable *L : Params)
+ emitLocalVariable(*L);
+
+ // Next emit all non-parameters in the order that we found them.
+ for (const LocalVariable &L : Locals)
+ if (!L.DIVar->isParameter())
+ emitLocalVariable(L);
+}
+
+void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
+ // LocalSym record, see SymbolRecord.h for more info.
+ MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(),
+ *LocalEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2);
+ OS.EmitLabel(LocalBegin);
+
+ OS.AddComment("Record kind: S_LOCAL");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2);
+
+ LocalSymFlags Flags = LocalSymFlags::None;
+ if (Var.DIVar->isParameter())
+ Flags |= LocalSymFlags::IsParameter;
+ if (Var.DefRanges.empty())
+ Flags |= LocalSymFlags::IsOptimizedOut;
+
+ OS.AddComment("TypeIndex");
+ TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType());
+ OS.EmitIntValue(TI.getIndex(), 4);
+ OS.AddComment("Flags");
+ OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
+ OS.EmitLabel(LocalEnd);
+
+ // Calculate the on disk prefix of the appropriate def range record. The
+ // records and on disk formats are described in SymbolRecords.h. BytePrefix
+ // should be big enough to hold all forms without memory allocation.
+ SmallString<20> BytePrefix;
+ for (const LocalVarDefRange &DefRange : Var.DefRanges) {
+ BytePrefix.clear();
+ // FIXME: Handle bitpieces.
+ if (DefRange.StructOffset != 0)
+ continue;
+
+ if (DefRange.InMemory) {
+ DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0,
+ 0, 0, ArrayRef<LocalVariableAddrGap>());
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&Sym.Header),
+ sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ } else {
+ assert(DefRange.DataOffset == 0 && "unexpected offset into register");
+ // Unclear what matters here.
+ DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0,
+ ArrayRef<LocalVariableAddrGap>());
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&Sym.Header),
+ sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ }
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
+ }
+}
+
+void CodeViewDebug::endFunction(const MachineFunction *MF) {
+ if (!Asm || !CurFn) // We haven't created any debug info for this function.
+ return;
+
+ const Function *GV = MF->getFunction();
+ assert(FnDebugInfo.count(GV));
+ assert(CurFn == &FnDebugInfo[GV]);
+
+ collectVariableInfo(GV->getSubprogram());
+
+ DebugHandlerBase::endFunction(MF);
+
+ // Don't emit anything if we don't have any line tables.
+ if (!CurFn->HaveLineInfo) {
+ FnDebugInfo.erase(GV);
+ CurFn = nullptr;
+ return;
+ }
+
+ CurFn->End = Asm->getFunctionEnd();
+
+ CurFn = nullptr;
+}
+
+void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+
+ // Ignore DBG_VALUE locations and function prologue.
+ if (!Asm || !CurFn || MI->isDebugValue() ||
+ MI->getFlag(MachineInstr::FrameSetup))
+ return;
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL == PrevInstLoc || !DL)
+ return;
+ maybeRecordLocation(DL, Asm->MF);
+}
+
+MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.EmitIntValue(unsigned(Kind), 4);
+ OS.AddComment("Subsection size");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
+ OS.EmitLabel(BeginLabel);
+ return EndLabel;
+}
+
+void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
+ OS.EmitLabel(EndLabel);
+ // Every subsection must be aligned to a 4-byte boundary.
+ OS.EmitValueToAlignment(4);
+}
+
+void CodeViewDebug::emitDebugInfoForUDTs(
+ ArrayRef<std::pair<std::string, TypeIndex>> UDTs) {
+ for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) {
+ MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
+ *UDTRecordEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2);
+ OS.EmitLabel(UDTRecordBegin);
+
+ OS.AddComment("Record kind: S_UDT");
+ OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
+
+ OS.AddComment("Type");
+ OS.EmitIntValue(UDT.second.getIndex(), 4);
+
+ emitNullTerminatedSymbolName(OS, UDT.first);
+ OS.EmitLabel(UDTRecordEnd);
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobals() {
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ // First, emit all globals that are not in a comdat in a single symbol
+ // substream. MSVC doesn't like it if the substream is empty, so only open
+ // it if we have at least one global to emit.
+ switchToDebugSectionForSymbol(nullptr);
+ MCSymbol *EndLabel = nullptr;
+ for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
+ if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
+ if (!EndLabel) {
+ OS.AddComment("Symbol subsection for globals");
+ EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ }
+ emitDebugInfoForGlobal(G, Asm->getSymbol(GV));
+ }
+ }
+ }
+ if (EndLabel)
+ endCVSubsection(EndLabel);
+
+ // Second, emit each global that is in a comdat into its own .debug$S
+ // section along with its own symbol substream.
+ for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
+ if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ if (GV->hasComdat()) {
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ OS.AddComment("Symbol subsection for " +
+ Twine(GlobalValue::getRealLinkageName(GV->getName())));
+ switchToDebugSectionForSymbol(GVSym);
+ EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ emitDebugInfoForGlobal(G, GVSym);
+ endCVSubsection(EndLabel);
+ }
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForRetainedTypes() {
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ for (auto *Ty : cast<DICompileUnit>(Node)->getRetainedTypes()) {
+ if (DIType *RT = dyn_cast<DIType>(Ty)) {
+ getTypeIndex(RT);
+ // FIXME: Add to global/local DTU list.
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+ MCSymbol *GVSym) {
+ // DataSym record, see SymbolRecord.h for more info.
+ // FIXME: Thread local data, etc
+ MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
+ *DataEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
+ OS.EmitLabel(DataBegin);
+ const auto *GV = cast<GlobalVariable>(DIGV->getVariable());
+ if (DIGV->isLocalToUnit()) {
+ if (GV->isThreadLocal()) {
+ OS.AddComment("Record kind: S_LTHREAD32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2);
+ } else {
+ OS.AddComment("Record kind: S_LDATA32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2);
+ }
+ } else {
+ if (GV->isThreadLocal()) {
+ OS.AddComment("Record kind: S_GTHREAD32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2);
+ } else {
+ OS.AddComment("Record kind: S_GDATA32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2);
+ }
+ }
+ OS.AddComment("Type");
+ OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("DataOffset");
+ OS.EmitCOFFSecRel32(GVSym);
+ OS.AddComment("Segment");
+ OS.EmitCOFFSectionIndex(GVSym);
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, DIGV->getName());
+ OS.EmitLabel(DataEnd);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
new file mode 100644
index 0000000..e4bbd61
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -0,0 +1,310 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+
+#include "DebugHandlerBase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class StringRef;
+class LexicalScope;
+struct ClassInfo;
+
+/// \brief Collects and handles line tables information in a CodeView format.
+class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
+ MCStreamer &OS;
+ codeview::MemoryTypeTableBuilder TypeTable;
+
+ /// Represents the most general definition range.
+ struct LocalVarDefRange {
+ /// Indicates that variable data is stored in memory relative to the
+ /// specified register.
+ int InMemory : 1;
+
+ /// Offset of variable data in memory.
+ int DataOffset : 31;
+
+ /// Offset of the data into the user level struct. If zero, no splitting
+ /// occurred.
+ uint16_t StructOffset;
+
+ /// Register containing the data or the register base of the memory
+ /// location containing the data.
+ uint16_t CVRegister;
+
+ /// Compares all location fields. This includes all fields except the label
+ /// ranges.
+ bool isDifferentLocation(LocalVarDefRange &O) {
+ return InMemory != O.InMemory || DataOffset != O.DataOffset ||
+ StructOffset != O.StructOffset || CVRegister != O.CVRegister;
+ }
+
+ SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
+ };
+
+ static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
+ static LocalVarDefRange createDefRangeReg(uint16_t CVRegister);
+
+ /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
+ struct LocalVariable {
+ const DILocalVariable *DIVar = nullptr;
+ SmallVector<LocalVarDefRange, 1> DefRanges;
+ };
+
+ struct InlineSite {
+ SmallVector<LocalVariable, 1> InlinedLocals;
+ SmallVector<const DILocation *, 1> ChildSites;
+ const DISubprogram *Inlinee = nullptr;
+
+ /// The ID of the inline site or function used with .cv_loc. Not a type
+ /// index.
+ unsigned SiteFuncId = 0;
+ };
+
+ // For each function, store a vector of labels to its instructions, as well as
+ // to the end of the function.
+ struct FunctionInfo {
+ /// Map from inlined call site to inlined instructions and child inlined
+ /// call sites. Listed in program order.
+ std::unordered_map<const DILocation *, InlineSite> InlineSites;
+
+ /// Ordered list of top-level inlined call sites.
+ SmallVector<const DILocation *, 1> ChildSites;
+
+ SmallVector<LocalVariable, 1> Locals;
+
+ DebugLoc LastLoc;
+ const MCSymbol *Begin = nullptr;
+ const MCSymbol *End = nullptr;
+ unsigned FuncId = 0;
+ unsigned LastFileId = 0;
+ bool HaveLineInfo = false;
+ };
+ FunctionInfo *CurFn;
+
+ /// The set of comdat .debug$S sections that we've seen so far. Each section
+ /// must start with a magic version number that must only be emitted once.
+ /// This set tracks which sections we've already opened.
+ DenseSet<MCSectionCOFF *> ComdatDebugSections;
+
+ /// Switch to the appropriate .debug$S section for GVSym. If GVSym, the symbol
+ /// of an emitted global value, is in a comdat COFF section, this will switch
+ /// to a new .debug$S section in that comdat. This method ensures that the
+ /// section starts with the magic version number on first use. If GVSym is
+ /// null, uses the main .debug$S section.
+ void switchToDebugSectionForSymbol(const MCSymbol *GVSym);
+
+ /// The next available function index for use with our .cv_* directives. Not
+ /// to be confused with type indices for LF_FUNC_ID records.
+ unsigned NextFuncId = 0;
+
+ InlineSite &getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee);
+
+ codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
+
+ static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
+ const FunctionInfo &FI,
+ const InlineSite &Site);
+
+ /// Remember some debug info about each function. Keep it in a stable order to
+ /// emit at the end of the TU.
+ MapVector<const Function *, FunctionInfo> FnDebugInfo;
+
+ /// Map from DIFile to .cv_file id.
+ DenseMap<const DIFile *, unsigned> FileIdMap;
+
+ /// All inlined subprograms in the order they should be emitted.
+ SmallSetVector<const DISubprogram *, 4> InlinedSubprograms;
+
+ /// Map from a pair of DI metadata nodes and its DI type (or scope) that can
+ /// be nullptr, to CodeView type indices. Primarily indexed by
+ /// {DIType*, DIType*} and {DISubprogram*, DIType*}.
+ ///
+ /// The second entry in the key is needed for methods as DISubroutineType
+ /// representing static method type are shared with non-method function type.
+ DenseMap<std::pair<const DINode *, const DIType *>, codeview::TypeIndex>
+ TypeIndices;
+
+ /// Map from DICompositeType* to complete type index. Non-record types are
+ /// always looked up in the normal TypeIndices map.
+ DenseMap<const DICompositeType *, codeview::TypeIndex> CompleteTypeIndices;
+
+ /// Complete record types to emit after all active type lowerings are
+ /// finished.
+ SmallVector<const DICompositeType *, 4> DeferredCompleteTypes;
+
+ /// Number of type lowering frames active on the stack.
+ unsigned TypeEmissionLevel = 0;
+
+ codeview::TypeIndex VBPType;
+
+ const DISubprogram *CurrentSubprogram = nullptr;
+
+ // The UDTs we have seen while processing types; each entry is a pair of type
+ // index and type name.
+ std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
+ GlobalUDTs;
+
+ typedef std::map<const DIFile *, std::string> FileToFilepathMapTy;
+ FileToFilepathMapTy FileToFilepathMap;
+ StringRef getFullFilepath(const DIFile *S);
+
+ unsigned maybeRecordFile(const DIFile *F);
+
+ void maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF);
+
+ void clear();
+
+ void setCurrentSubprogram(const DISubprogram *SP) {
+ CurrentSubprogram = SP;
+ LocalUDTs.clear();
+ }
+
+ /// Emit the magic version number at the start of a CodeView type or symbol
+ /// section. Appears at the front of every .debug$S or .debug$T section.
+ void emitCodeViewMagicVersion();
+
+ void emitTypeInformation();
+
+ void emitInlineeLinesSubsection();
+
+ void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
+
+ void emitDebugInfoForGlobals();
+
+ void emitDebugInfoForRetainedTypes();
+
+ void emitDebugInfoForUDTs(
+ ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
+
+ void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym);
+
+ /// Opens a subsection of the given kind in a .debug$S codeview section.
+ /// Returns an end label for use with endCVSubsection when the subsection is
+ /// finished.
+ MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind);
+
+ void endCVSubsection(MCSymbol *EndLabel);
+
+ void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
+ const InlineSite &Site);
+
+ typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
+ void collectVariableInfo(const DISubprogram *SP);
+
+ void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed);
+
+ /// Records information about a local variable in the appropriate scope. In
+ /// particular, locals from inlined code live inside the inlining site.
+ void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc);
+
+ /// Emits local variables in the appropriate order.
+ void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
+
+ /// Emits an S_LOCAL record and its associated defined ranges.
+ void emitLocalVariable(const LocalVariable &Var);
+
+ /// Translates the DIType to codeview if necessary and returns a type index
+ /// for it.
+ codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
+ DITypeRef ClassTyRef = DITypeRef());
+
+ codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class);
+
+ codeview::TypeIndex getScopeIndex(const DIScope *Scope);
+
+ codeview::TypeIndex getVBPTypeIndex();
+
+ void addToUDTs(const DIType *Ty, codeview::TypeIndex TI);
+
+ codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
+ codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
+ codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+ codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
+ const DIType *ClassTy,
+ int ThisAdjustment);
+ codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
+
+ /// Symbol records should point to complete types, but type records should
+ /// always point to incomplete types to avoid cycles in the type graph. Only
+ /// use this entry point when generating symbol records. The complete and
+ /// incomplete type indices only differ for record types. All other types use
+ /// the same index.
+ codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef);
+
+ codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
+
+ struct TypeLoweringScope;
+
+ void emitDeferredCompleteTypes();
+
+ void collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy);
+ ClassInfo collectClassInfo(const DICompositeType *Ty);
+
+ /// Common record member lowering functionality for record types, which are
+ /// structs, classes, and unions. Returns the field list index and the member
+ /// count.
+ std::tuple<codeview::TypeIndex, codeview::TypeIndex, unsigned, bool>
+ lowerRecordFieldList(const DICompositeType *Ty);
+
+ /// Inserts {{Node, ClassTy}, TI} into TypeIndices and checks for duplicates.
+ codeview::TypeIndex recordTypeIndexForDINode(const DINode *Node,
+ codeview::TypeIndex TI,
+ const DIType *ClassTy = nullptr);
+
+ unsigned getPointerSizeInBytes();
+
+public:
+ CodeViewDebug(AsmPrinter *Asm);
+
+ void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
+
+ /// \brief Emit the COFF section that holds the line table information.
+ void endModule() override;
+
+ /// \brief Gather pre-function debug information.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// \brief Gather post-function debug information.
+ void endFunction(const MachineFunction *) override;
+
+ /// \brief Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 7b0cdbd..2aaa85a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -32,39 +32,6 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// EmittingAsmStreamer Implementation
-//===----------------------------------------------------------------------===//
-unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
- unsigned PadTo) {
- AP->EmitULEB128(Value, Desc, PadTo);
- return 0;
-}
-
-unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) {
- AP->EmitInt8(Value);
- return 0;
-}
-
-unsigned EmittingAsmStreamer::emitBytes(StringRef Data) {
- AP->OutStreamer->EmitBytes(Data);
- return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// SizeReporterAsmStreamer Implementation
-//===----------------------------------------------------------------------===//
-unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
- unsigned PadTo) {
- return getULEB128Size(Value);
-}
-
-unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; }
-
-unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) {
- return Data.size();
-}
-
-//===----------------------------------------------------------------------===//
// DIEAbbrevData Implementation
//===----------------------------------------------------------------------===//
@@ -512,20 +479,6 @@ void DIEEntry::print(raw_ostream &O) const {
}
//===----------------------------------------------------------------------===//
-// DIETypeSignature Implementation
-//===----------------------------------------------------------------------===//
-void DIETypeSignature::EmitValue(const AsmPrinter *Asm,
- dwarf::Form Form) const {
- assert(Form == dwarf::DW_FORM_ref_sig8);
- Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8);
-}
-
-LLVM_DUMP_METHOD
-void DIETypeSignature::print(raw_ostream &O) const {
- O << format("Type Unit: 0x%lx", Unit->getTypeSignature());
-}
-
-//===----------------------------------------------------------------------===//
// DIELoc Implementation
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 0201065..74c47d1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -279,7 +279,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
// Hash an individual attribute \param Attr based on the type of attribute and
// the form.
-void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) {
+void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
dwarf::Attribute Attribute = Value.getAttribute();
// Other attribute values use the letter 'A' as the marker, and the value
@@ -353,7 +353,6 @@ void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) {
case DIEValue::isExpr:
case DIEValue::isLabel:
case DIEValue::isDelta:
- case DIEValue::isTypeSignature:
llvm_unreachable("Add support for additional value types.");
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 44f0ce8..996cd7e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -131,7 +131,7 @@ private:
void hashLocList(const DIELocList &LocList);
/// \brief Hashes an individual attribute.
- void hashAttribute(DIEValue Value, dwarf::Tag Tag);
+ void hashAttribute(const DIEValue &Value, dwarf::Tag Tag);
/// \brief Hashes an attribute that refers to another DIE.
void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 3c46a99..adc536f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -15,7 +15,9 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <map>
using namespace llvm;
@@ -40,7 +42,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
assert(MI.isDebugValue() && "not a DBG_VALUE");
auto &Ranges = VarInstrRanges[Var];
if (!Ranges.empty() && Ranges.back().second == nullptr &&
- Ranges.back().first->isIdenticalTo(&MI)) {
+ Ranges.back().first->isIdenticalTo(MI)) {
DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
<< "\t" << Ranges.back().first << "\t" << MI << "\n");
return;
@@ -122,26 +124,6 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
}
-// \brief Collect all registers clobbered by @MI and apply the functor
-// @Func to their RegNo.
-// @Func should be a functor with a void(unsigned) signature. We're
-// not using std::function here for performance reasons. It has a
-// small but measurable impact. By using a functor instead of a
-// std::set& here, we can avoid the overhead of constructing
-// temporaries in calculateDbgValueHistory, which has a significant
-// performance impact.
-template<typename Callable>
-static void applyToClobberedRegisters(const MachineInstr &MI,
- const TargetRegisterInfo *TRI,
- Callable Func) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg())
- continue;
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Func(*AI);
- }
-}
-
// \brief Returns the first instruction in @MBB which corresponds to
// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
@@ -156,12 +138,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
E = MBB.rend();
I != E; ++I) {
if (I->getDebugLoc() != LastLoc)
- return Res;
+ return &*Res;
Res = &*I;
}
// If all instructions have the same debug location, assume whole MBB is
// an epilogue.
- return MBB.begin();
+ return &*MBB.begin();
}
// \brief Collect registers that are modified in the function body (their
@@ -173,10 +155,23 @@ static void collectChangingRegs(const MachineFunction *MF,
auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
for (const auto &MI : MBB) {
+ // Avoid looking at prologue or epilogue instructions.
if (&MI == FirstEpilogueInst)
break;
- if (!MI.getFlag(MachineInstr::FrameSetup))
- applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); });
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ continue;
+
+ // Look for register defs and register masks. Register masks are
+ // typically on calls and they clobber everything not in the mask.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ Regs.set(*AI);
+ } else if (MO.isRegMask()) {
+ Regs.setBitsNotInMask(MO.getRegMask());
+ }
+ }
}
}
}
@@ -187,16 +182,35 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
BitVector ChangingRegs(TRI->getNumRegs());
collectChangingRegs(MF, TRI, ChangingRegs);
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
RegDescribedVarsMap RegVars;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
if (!MI.isDebugValue()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
- applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) {
- if (ChangingRegs.test(RegNo))
- clobberRegisterUses(RegVars, RegNo, Result, MI);
- });
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // If this is a register def operand, it may end a debug value
+ // range.
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ if (ChangingRegs.test(*AI))
+ clobberRegisterUses(RegVars, *AI, Result, MI);
+ } else if (MO.isRegMask()) {
+ // If this is a register mask operand, clobber all debug values in
+ // non-CSRs.
+ for (int I = ChangingRegs.find_first(); I != -1;
+ I = ChangingRegs.find_next(I)) {
+ // Don't consider SP to be clobbered by register masks.
+ if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
+ MO.clobbersPhysReg(I)) {
+ clobberRegisterUses(RegVars, I, Result, MI);
+ }
+ }
+ }
+ }
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 546d1b4..16d2d7f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -12,13 +12,12 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
namespace llvm {
class MachineFunction;
class MachineInstr;
-class DILocalVariable;
-class DILocation;
class TargetRegisterInfo;
// For each user variable, keep a list of instruction ranges where this variable
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
new file mode 100644
index 0000000..16ffe2e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -0,0 +1,230 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DebugHandlerBase.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
+void DebugHandlerBase::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
+ if (!Children.empty())
+ WorkList.append(Children.begin(), Children.end());
+
+ if (S->isAbstractScope())
+ continue;
+
+ for (const InsnRange &R : S->getRanges()) {
+ assert(R.first && "InsnRange does not have first instruction!");
+ assert(R.second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(R.first);
+ requestLabelAfterInsn(R.second);
+ }
+ }
+}
+
+// Return Label preceding the instruction.
+MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+// Return Label immediately following the instruction.
+MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+// Determine the relative position of the pieces described by P1 and P2.
+// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
+// 1 if P1 is entirely after P2.
+int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
+ unsigned l1 = P1->getBitPieceOffset();
+ unsigned l2 = P2->getBitPieceOffset();
+ unsigned r1 = l1 + P1->getBitPieceSize();
+ unsigned r2 = l2 + P2->getBitPieceSize();
+ if (r1 <= l2)
+ return -1;
+ else if (r2 <= l1)
+ return 1;
+ else
+ return 0;
+}
+
+/// Determine whether two variable pieces overlap.
+bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
+ if (!P1->isBitPiece() || !P2->isBitPiece())
+ return true;
+ return pieceCmp(P1, P2) == 0;
+}
+
+/// If this type is derived from a base type then return base type size.
+uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
+ DIType *Ty = TyRef.resolve();
+ assert(Ty);
+ DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+ if (!DDTy)
+ return Ty->getSizeInBits();
+
+ unsigned Tag = DDTy->getTag();
+
+ if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+ Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type)
+ return DDTy->getSizeInBits();
+
+ DIType *BaseType = DDTy->getBaseType().resolve();
+
+ assert(BaseType && "Unexpected invalid base type");
+
+ // If this is a derived type, go ahead and get the base type, unless it's a
+ // reference then it's just the size of the field. Pointer types have no need
+ // of this since they're a different type of qualification on the type.
+ if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+ BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+ return Ty->getSizeInBits();
+
+ return getBaseTypeSize(BaseType);
+}
+
+void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ // Grab the lexical scopes for the function, if we don't have any of those
+ // then we're not going to be able to do anything.
+ LScopes.initialize(*MF);
+ if (LScopes.empty())
+ return;
+
+ // Make sure that each lexical scope will have a begin/end label.
+ identifyScopeMarkers();
+
+ // Calculate history for local variables.
+ assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
+ calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+ DbgValues);
+
+ // Request labels for the full history.
+ for (const auto &I : DbgValues) {
+ const auto &Ranges = I.second;
+ if (Ranges.empty())
+ continue;
+
+ // The first mention of a function argument gets the CurrentFnBegin
+ // label, so arguments are visible when breaking at function entry.
+ const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+ if (DIVar->isParameter() &&
+ getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+ LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
+ if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
+ // Mark all non-overlapping initial pieces.
+ for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
+ const DIExpression *Piece = I->first->getDebugExpression();
+ if (std::all_of(Ranges.begin(), I,
+ [&](DbgValueHistoryMap::InstrRange Pred) {
+ return !piecesOverlap(Piece, Pred.first->getDebugExpression());
+ }))
+ LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
+ else
+ break;
+ }
+ }
+ }
+
+ for (const auto &Range : Ranges) {
+ requestLabelBeforeInsn(Range.first);
+ if (Range.second)
+ requestLabelAfterInsn(Range.second);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = Asm->getFunctionBegin();
+}
+
+void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
+ if (!MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI == nullptr);
+ CurMI = MI;
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+void DebugHandlerBase::endInstruction() {
+ if (!MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI != nullptr);
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!CurMI->isDebugValue())
+ PrevLabel = nullptr;
+
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsAfterInsn.find(CurMI);
+ CurMI = nullptr;
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ DbgValues.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
new file mode 100644
index 0000000..b8bbcec
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -0,0 +1,109 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+
+#include "AsmPrinterHandler.h"
+#include "DbgValueHistoryCalculator.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineModuleInfo;
+
+/// Base class for debug information backends. Common functionality related to
+/// tracking which variables and scopes are alive at a given PC live here.
+class DebugHandlerBase : public AsmPrinterHandler {
+protected:
+ DebugHandlerBase(AsmPrinter *A);
+
+ /// Target of debug info emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// Previous instruction's location information. This is used to
+ /// determine label location to indicate scope boundries in dwarf
+ /// debug info.
+ DebugLoc PrevInstLoc;
+ MCSymbol *PrevLabel = nullptr;
+
+ /// This location indicates end of function prologue and beginning of
+ /// function body.
+ DebugLoc PrologEndLoc;
+
+ /// If nonnull, stores the current machine instruction we're processing.
+ const MachineInstr *CurMI = nullptr;
+
+ LexicalScopes LScopes;
+
+ /// History of DBG_VALUE and clobber instructions for each user
+ /// variable. Variables are listed in order of appearance.
+ DbgValueHistoryMap DbgValues;
+
+ /// Maps instruction with label emitted before instruction.
+ /// FIXME: Make this private from DwarfDebug, we have the necessary accessors
+ /// for it.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+ /// Maps instruction with label emitted after instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+ /// Indentify instructions that are marking the beginning of or
+ /// ending of a scope.
+ void identifyScopeMarkers();
+
+ /// Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
+ }
+
+ /// Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
+ }
+
+ // AsmPrinterHandler overrides.
+public:
+ void beginInstruction(const MachineInstr *MI) override;
+ void endInstruction() override;
+
+ void beginFunction(const MachineFunction *MF) override;
+ void endFunction(const MachineFunction *MF) override;
+
+ /// Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+ /// Determine the relative position of the pieces described by P1 and P2.
+ /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
+ /// 1 if P1 is entirely after P2.
+ static int pieceCmp(const DIExpression *P1, const DIExpression *P2);
+
+ /// Determine whether two variable pieces overlap.
+ static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2);
+
+ /// If this type is derived from a base type then return base type size.
+ static uint64_t getBaseTypeSize(const DITypeRef TyRef);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index b60ab91..20acd45 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -11,11 +11,11 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#include "DebugLocStream.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
namespace llvm {
class AsmPrinter;
@@ -76,6 +76,20 @@ public:
const DIExpression *getExpression() const { return Expression; }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
+ void dump() const {
+ if (isLocation()) {
+ llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+ if (Loc.isIndirect())
+ llvm::dbgs() << '+' << Loc.getOffset();
+ llvm::dbgs() << "} ";
+ }
+ else if (isConstantInt())
+ Constant.CIP->dump();
+ else if (isConstantFP())
+ Constant.CFP->dump();
+ if (Expression)
+ Expression->dump();
+ }
};
private:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 6665c16..2eae1b2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -43,8 +42,7 @@ DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
: EHStreamer(A), shouldEmitCFI(false) {}
void DwarfCFIExceptionBase::markFunctionEnd() {
- if (shouldEmitCFI)
- Asm->OutStreamer->EmitCFIEndProc();
+ endFragment();
if (MMI->getLandingPads().empty())
return;
@@ -53,23 +51,28 @@ void DwarfCFIExceptionBase::markFunctionEnd() {
MMI->TidyLandingPads();
}
+void DwarfCFIExceptionBase::endFragment() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->EmitCFIEndProc();
+}
+
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
: DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
- shouldEmitLSDA(false), shouldEmitMoves(false),
- moveTypeModule(AsmPrinter::CFI_M_None) {}
+ forceEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
DwarfCFIException::~DwarfCFIException() {}
/// endModule - Emit all exception information that should come after the
/// content.
void DwarfCFIException::endModule() {
- if (moveTypeModule == AsmPrinter::CFI_M_Debug)
- Asm->OutStreamer->EmitCFISections(false, true);
-
// SjLj uses this pass and it doesn't need this info.
if (!Asm->MAI->usesCFIForEH())
return;
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer->EmitCFISections(false, true);
+
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -86,6 +89,10 @@ void DwarfCFIException::endModule() {
}
}
+static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
+ return Asm->getCurExceptionSym();
+}
+
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
const Function *F = MF->getFunction();
@@ -109,7 +116,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
// Emit a personality function even when there are no landing pads
- bool forceEmitPersonality =
+ forceEmitPersonality =
// ...if a personality function is explicitly specified
F->hasPersonalityFn() &&
// ... and it's not known to be a noop in the absence of invokes
@@ -126,7 +133,13 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves;
+ shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() &&
+ (shouldEmitPersonality || shouldEmitMoves);
+ beginFragment(&*MF->begin(), getExceptionSym);
+}
+
+void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) {
if (!shouldEmitCFI)
return;
@@ -136,20 +149,24 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality)
return;
+ auto *F = MBB->getParent()->getFunction();
+ auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ assert(P && "Expected personality function");
+
// If we are forced to emit this personality, make sure to record
// it because it might not appear in any landingpad
if (forceEmitPersonality)
- MMI->addPersonality(Per);
+ MMI->addPersonality(P);
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
const MCSymbol *Sym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+ TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
// Provide LSDA information.
- if (!shouldEmitLSDA)
- return;
-
- Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding);
+ if (shouldEmitLSDA)
+ Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
}
/// endFunction - Gather and emit post-function exception information.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 725063a..7822814c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -19,9 +19,10 @@ namespace llvm {
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU)
- : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
+ : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID),
Skeleton(nullptr), BaseAddress(nullptr) {
insertDIE(Node, &getUnitDie());
+ MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
/// addLabelAddress - Add a dwarf label attribute data and value using
@@ -83,8 +84,8 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
// First operand points to a global struct.
Value *Ptr = CE->getOperand(0);
- if (!isa<GlobalValue>(Ptr) ||
- !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
+ GlobalValue *GV = dyn_cast<GlobalValue>(Ptr);
+ if (!GV || !isa<StructType>(GV->getValueType()))
return nullptr;
// Second operand is zero.
@@ -147,61 +148,69 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
// Add location.
bool addToAccelTable = false;
if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
- addToAccelTable = true;
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- const MCSymbol *Sym = Asm->getSymbol(Global);
- if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
- // TODO: add debug info for emulated thread local mode.
- } else {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
- ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ // We cannot describe the location of dllimport'd variables: the computation
+ // of their address requires loads from the IAT.
+ if (!Global->hasDLLImportStorageClass()) {
+ addToAccelTable = true;
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
} else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
+ ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
- // 3) followed by an OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
}
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- }
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- addLinkageName(*VariableDIE, GV->getLinkageName());
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ if (DD->useAllLinkageNames())
+ addLinkageName(*VariableDIE, GV->getLinkageName());
+ }
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
addConstantValue(*VariableDIE, CI, GTy);
} else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
- addToAccelTable = true;
- // GV is a merged global.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- Value *Ptr = CE->getOperand(0);
- MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
- addUInt(*Loc, dwarf::DW_FORM_udata,
- Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ auto *Ptr = cast<GlobalValue>(CE->getOperand(0));
+ if (!Ptr->hasDLLImportStorageClass()) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ MCSymbol *Sym = Asm->getSymbol(Ptr);
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(),
+ Idx));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ }
}
if (addToAccelTable) {
@@ -285,7 +294,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
- if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
+ if (DD->useAppleExtensionAttributes() &&
+ !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
*DD->getCurrentFunction()))
addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
@@ -503,9 +513,20 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
addVariableAddress(DV, *VariableDie, Location);
} else if (RegOp.getReg())
addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
- } else if (DVInsn->getOperand(0).isImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
- else if (DVInsn->getOperand(0).isFPImm())
+ } else if (DVInsn->getOperand(0).isImm()) {
+ // This variable is described by a single constant.
+ // Check whether it has a DIExpression.
+ auto *Expr = DV.getSingleExpression();
+ if (Expr && Expr->getNumElements()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
+ addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+ } else
+ addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
+ } else if (DVInsn->getOperand(0).isFPImm())
addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isCImm())
addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
@@ -526,7 +547,8 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
- DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
+ DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ FrameReg, Offset);
DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
++Expr;
}
@@ -683,25 +705,6 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
applySubprogramAttributesToDefinition(SP, *D);
}
}
-void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) {
- assert(SP && "CU's subprogram list contains a non-subprogram");
- assert(SP->isDefinition() &&
- "CU's subprogram list contains a subprogram declaration");
- auto Variables = SP->getVariables();
- if (Variables.size() == 0)
- return;
-
- DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
- if (!SPDIE)
- SPDIE = getDIE(SP);
- assert(SPDIE);
- for (const DILocalVariable *DV : Variables) {
- DbgVariable NewVar(DV, /* IA */ nullptr, DD);
- auto VariableDie = constructVariableDIE(NewVar);
- applyVariableAttributes(NewVar, *VariableDie);
- SPDIE->addChild(std::move(VariableDie));
- }
-}
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
@@ -770,16 +773,16 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- assert(DV.getExpression().size() == 1);
- const DIExpression *Expr = DV.getExpression().back();
+ const DIExpression *Expr = DV.getSingleExpression();
bool ValidReg;
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (Location.getOffset()) {
- ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
+ ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(),
Location.getOffset());
if (ValidReg)
DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
} else
- ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
+ ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg());
// Now attach the location information to the DIE.
if (ValidReg)
@@ -824,7 +827,7 @@ bool DwarfCompileUnit::isDwoUnit() const {
}
bool DwarfCompileUnit::includeMinimalInlineScopes() const {
- return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
+ return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
}
} // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 2e28467..90f74a3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,12 +15,12 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DwarfUnit.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Dwarf.h"
namespace llvm {
+class StringRef;
class AsmPrinter;
class DIE;
class DwarfDebug;
@@ -29,6 +29,12 @@ class MCSymbol;
class LexicalScope;
class DwarfCompileUnit : public DwarfUnit {
+ /// A numeric ID unique among all CUs in the module
+ unsigned UniqueID;
+
+ /// Offset of the UnitDie from beginning of debug info section.
+ unsigned DebugInfoOffset = 0;
+
/// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
/// the need to search for it in applyStmtList.
DIE::value_iterator StmtListValue;
@@ -39,6 +45,9 @@ class DwarfCompileUnit : public DwarfUnit {
/// The start of the unit within its section.
MCSymbol *LabelBegin;
+ /// The start of the unit macro info within macro section.
+ MCSymbol *MacroLabelBegin;
+
typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
ImportedEntityMap;
@@ -74,6 +83,10 @@ public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
+ unsigned getUniqueID() const { return UniqueID; }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+
DwarfCompileUnit *getSkeleton() const {
return Skeleton;
}
@@ -105,7 +118,14 @@ public:
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
void addImportedEntity(const DIImportedEntity* IE) {
- ImportedEntities[IE->getScope()].push_back(IE);
+ DIScope *Scope = IE->getScope();
+ assert(Scope && "Invalid Scope encoding!");
+ if (!isa<DILocalScope>(Scope))
+ // No need to add imported enities that are not local declaration.
+ return;
+
+ auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
+ ImportedEntities[LocalScope].push_back(IE);
}
/// addRange - Add an address range to the list of ranges for this unit.
@@ -167,8 +187,6 @@ public:
void finishSubprogramDefinition(const DISubprogram *SP);
- void collectDeadVariables(const DISubprogram *SP);
-
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
@@ -189,6 +207,10 @@ public:
return LabelBegin;
}
+ MCSymbol *getMacroLabelBegin() const {
+ return MacroLabelBegin;
+ }
+
/// Add a new global name to the compile unit.
void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f56c8e4..7fba768 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -26,7 +26,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Instructions.h"
@@ -54,6 +53,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
@@ -105,13 +105,21 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
-static cl::opt<DefaultOnOff>
-DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
- cl::desc("Emit DWARF linkage-name attributes."),
- cl::values(clEnumVal(Default, "Default for platform"),
- clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled"), clEnumValEnd),
- cl::init(Default));
+enum LinkageNameOption {
+ DefaultLinkageNames,
+ AllLinkageNames,
+ AbstractLinkageNames
+};
+static cl::opt<LinkageNameOption>
+ DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+ cl::desc("Which DWARF linkage-name attributes to emit."),
+ cl::values(clEnumValN(DefaultLinkageNames, "Default",
+ "Default for platform"),
+ clEnumValN(AllLinkageNames, "All", "All"),
+ clEnumValN(AbstractLinkageNames, "Abstract",
+ "Abstract subprograms"),
+ clEnumValEnd),
+ cl::init(DefaultLinkageNames));
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
@@ -130,28 +138,21 @@ void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
-bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) {
// This information is not available while emitting .debug_loc entries.
return false;
}
//===----------------------------------------------------------------------===//
-/// resolve - Look in the DwarfDebug map for the MDNode that
-/// corresponds to the reference.
-template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const {
- return DD->resolve(Ref);
-}
-
bool DbgVariable::isBlockByrefVariable() const {
assert(Var && "Invalid complex DbgVariable!");
- return Var->getType()
- .resolve(DD->getTypeIdentifierMap())
- ->isBlockByrefStruct();
+ return Var->getType().resolve()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
- DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap());
+ DIType *Ty = Var->getType().resolve();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
@@ -201,8 +202,8 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
- PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
+ : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
+ InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -214,7 +215,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
CurFn = nullptr;
- CurMI = nullptr;
Triple TT(Asm->getTargetTriple());
// Make sure we know our "debugger tuning." The target option takes
@@ -234,6 +234,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
HasDwarfAccelTables = DwarfAccelTables == Enable;
+ HasAppleExtensionAttributes = tuneForLLDB();
+
// Handle split DWARF. Off by default for now.
if (SplitDwarf == Default)
HasSplitDwarf = false;
@@ -246,11 +248,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
HasDwarfPubSections = DwarfPubSections == Enable;
- // SCE does not use linkage names.
- if (DwarfLinkageNames == Default)
- UseLinkageNames = !tuneForSCE();
+ // SCE defaults to linkage names only for abstract subprograms.
+ if (DwarfLinkageNames == DefaultLinkageNames)
+ UseAllLinkageNames = !tuneForSCE();
else
- UseLinkageNames = DwarfLinkageNames == Enable;
+ UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
@@ -265,12 +267,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// https://sourceware.org/bugzilla/show_bug.cgi?id=11616
UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
- Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
+ // GDB does not fully support the DWARF 4 representation for bitfields.
+ UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
- {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- beginModule();
- }
+ Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
@@ -297,7 +297,6 @@ static void getObjCClassCategory(StringRef In, StringRef &Class,
Class = In.slice(In.find('[') + 1, In.find('('));
Category = In.slice(In.find('[') + 1, In.find(' '));
- return;
}
static StringRef getObjCMethodName(StringRef In) {
@@ -367,8 +366,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- auto &CU = SPMap[SP];
- forBothCUs(*CU, [&](DwarfCompileUnit &CU) {
+ auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit());
+ forBothCUs(CU, [&](DwarfCompileUnit &CU) {
CU.constructAbstractSubprogramScopeDIE(Scope);
});
}
@@ -392,8 +391,11 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
DwarfCompileUnit &NewCU = *OwnedUnit;
DIE &Die = NewCU.getUnitDie();
InfoHolder.addUnit(std::move(OwnedUnit));
- if (useSplitDwarf())
+ if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit->getSplitDebugFilename());
+ }
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
@@ -419,16 +421,18 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
addGnuPubAttributes(NewCU, Die);
}
- if (DIUnit->isOptimized())
- NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+ if (useAppleExtensionAttributes()) {
+ if (DIUnit->isOptimized())
+ NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
- StringRef Flags = DIUnit->getFlags();
- if (!Flags.empty())
- NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty())
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
- if (unsigned RVer = DIUnit->getRuntimeVersion())
- NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
- dwarf::DW_FORM_data1, RVer);
+ if (unsigned RVer = DIUnit->getRuntimeVersion())
+ NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+ }
if (useSplitDwarf())
NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
@@ -460,48 +464,42 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
void DwarfDebug::beginModule() {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
if (DisableDebugInfoPrinting)
return;
const Module *M = MMI->getModule();
- NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (!CU_Nodes)
- return;
- TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
-
- SingleCU = CU_Nodes->getNumOperands() == 1;
+ unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
+ M->debug_compile_units_end());
+ // Tell MMI whether we have debug info.
+ MMI->setDebugInfoAvailability(NumDebugCUs > 0);
+ SingleCU = NumDebugCUs == 1;
- for (MDNode *N : CU_Nodes->operands()) {
- auto *CUNode = cast<DICompileUnit>(N);
+ for (DICompileUnit *CUNode : M->debug_compile_units()) {
DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
CU.addImportedEntity(IE);
for (auto *GV : CUNode->getGlobalVariables())
CU.getOrCreateGlobalVariableDIE(GV);
- for (auto *SP : CUNode->getSubprograms())
- SPMap.insert(std::make_pair(SP, &CU));
for (auto *Ty : CUNode->getEnumTypes()) {
// The enum types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
- CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
+ CU.getOrCreateTypeDIE(cast<DIType>(Ty));
}
for (auto *Ty : CUNode->getRetainedTypes()) {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
- DIType *RT = cast<DIType>(resolve(Ty->getRef()));
- if (!RT->isExternalTypeRef())
- // There is no point in force-emitting a forward declaration.
- CU.getOrCreateTypeDIE(RT);
+ if (DIType *RT = dyn_cast<DIType>(Ty))
+ if (!RT->isExternalTypeRef())
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
}
// Emit imported_modules last so that the relevant context is already
// available.
for (auto *IE : CUNode->getImportedEntities())
constructAndAddImportedEntityDIE(CU, IE);
}
-
- // Tell MMI that we have debug info.
- MMI->setDebugInfoAvailability(true);
}
void DwarfDebug::finishVariableDefinitions() {
@@ -524,31 +522,13 @@ void DwarfDebug::finishVariableDefinitions() {
}
void DwarfDebug::finishSubprogramDefinitions() {
- for (const auto &P : SPMap)
- forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
- CU.finishSubprogramDefinition(cast<DISubprogram>(P.first));
- });
-}
-
-
-// Collect info for variables that were optimized out.
-void DwarfDebug::collectDeadVariables() {
- const Module *M = MMI->getModule();
-
- if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
- for (MDNode *N : CU_Nodes->operands()) {
- auto *TheCU = cast<DICompileUnit>(N);
- // Construct subprogram DIE and add variables DIEs.
- DwarfCompileUnit *SPCU =
- static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
- assert(SPCU && "Unable to find Compile Unit!");
- for (auto *SP : TheCU->getSubprograms()) {
- if (ProcessedSPNodes.count(SP) != 0)
- continue;
- SPCU->collectDeadVariables(SP);
- }
- }
- }
+ for (auto &F : MMI->getModule()->functions())
+ if (auto *SP = F.getSubprogram())
+ if (ProcessedSPNodes.count(SP) &&
+ SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
+ forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
+ CU.finishSubprogramDefinition(SP);
+ });
}
void DwarfDebug::finalizeModuleInfo() {
@@ -558,11 +538,6 @@ void DwarfDebug::finalizeModuleInfo() {
finishVariableDefinitions();
- // Collect info for variables that were optimized out.
- collectDeadVariables();
-
- unsigned MacroOffset = 0;
- std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm));
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -617,13 +592,11 @@ void DwarfDebug::finalizeModuleInfo() {
}
auto *CUNode = cast<DICompileUnit>(P.first);
- if (CUNode->getMacros()) {
- // Compile Unit has macros, emit "DW_AT_macro_info" attribute.
- U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info,
- dwarf::DW_FORM_sec_offset, MacroOffset);
- // Update macro section offset
- MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U);
- }
+ // If compile Unit has macros, emit "DW_AT_macro_info" attribute.
+ if (CUNode->getMacros())
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
}
// Compute DIE offsets and sizes.
@@ -694,7 +667,6 @@ void DwarfDebug::endModule() {
}
// clean up.
- SPMap.clear();
AbstractVariables.clear();
}
@@ -717,7 +689,7 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
LexicalScope *Scope) {
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr, this);
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
}
@@ -761,7 +733,7 @@ void DwarfDebug::collectVariableInfoFromMMITable(
continue;
ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
- auto RegVar = make_unique<DbgVariable>(Var.first, Var.second, this);
+ auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
ConcreteVariables.push_back(std::move(RegVar));
@@ -793,29 +765,6 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-// Determine the relative position of the pieces described by P1 and P2.
-// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-// 1 if P1 is entirely after P2.
-static int pieceCmp(const DIExpression *P1, const DIExpression *P2) {
- unsigned l1 = P1->getBitPieceOffset();
- unsigned l2 = P2->getBitPieceOffset();
- unsigned r1 = l1 + P1->getBitPieceSize();
- unsigned r2 = l2 + P2->getBitPieceSize();
- if (r1 <= l2)
- return -1;
- else if (r2 <= l1)
- return 1;
- else
- return 0;
-}
-
-/// Determine whether two variable pieces overlap.
-static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
- if (!P1->isBitPiece() || !P2->isBitPiece())
- return true;
- return pieceCmp(P1, P2) == 0;
-}
-
/// \brief If this and Next are describing different pieces of the same
/// variable, merge them by appending Next's values to the current
/// list of values.
@@ -832,8 +781,9 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
// sorted.
for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
for (; j < Next.Values.size(); ++j) {
- int res = pieceCmp(cast<DIExpression>(Values[i].Expression),
- cast<DIExpression>(Next.Values[j].Expression));
+ int res = DebugHandlerBase::pieceCmp(
+ cast<DIExpression>(Values[i].Expression),
+ cast<DIExpression>(Next.Values[j].Expression));
if (res == 0) // The two expressions overlap, we can't merge.
return false;
// Values[i] is entirely before Next.Values[j],
@@ -944,7 +894,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
DEBUG({
dbgs() << CurEntry->getValues().size() << " Values:\n";
for (auto &Value : CurEntry->getValues())
- Value.getExpression()->dump();
+ Value.dump();
dbgs() << "-----\n";
});
@@ -957,12 +907,23 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
InlinedVariable IV) {
ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
- ConcreteVariables.push_back(
- make_unique<DbgVariable>(IV.first, IV.second, this));
+ ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
return ConcreteVariables.back().get();
}
+// Determine whether this DBG_VALUE is valid at the beginning of the function.
+static bool validAtEntry(const MachineInstr *MInsn) {
+ auto MBB = MInsn->getParent();
+ // Is it in the entry basic block?
+ if (!MBB->pred_empty())
+ return false;
+ for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
+ if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
+ return false;
+ return true;
+}
+
// Find variables for each lexical scope.
void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
@@ -995,8 +956,11 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- // Check if the first DBG_VALUE is valid for the rest of the function.
- if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
+ // Check if there is a single DBG_VALUE, valid throughout the function.
+ // A single constant is also considered valid for the entire function.
+ if (Ranges.size() == 1 &&
+ (MInsn->getOperand(0).isImm() ||
+ (validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
RegVar->initializeDbgValue(MInsn);
continue;
}
@@ -1008,7 +972,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
SmallVector<DebugLocEntry, 8> Entries;
buildLocationList(Entries, Ranges);
- // If the variable has an DIBasicType, extract it. Basic types cannot have
+ // If the variable has a DIBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
// identifier map.
const DIBasicType *BT = dyn_cast<DIBasicType>(
@@ -1027,25 +991,14 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
}
}
-// Return Label preceding the instruction.
-MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
- MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
- assert(Label && "Didn't insert label before instruction");
- return Label;
-}
-
-// Return Label immediately following the instruction.
-MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
- return LabelsAfterInsn.lookup(MI);
-}
-
// Process beginning of an instruction.
void DwarfDebug::beginInstruction(const MachineInstr *MI) {
- assert(CurMI == nullptr);
- CurMI = MI;
+ DebugHandlerBase::beginInstruction(MI);
+ assert(CurMI);
+
// Check if source location changes, but ignore DBG_VALUE locations.
if (!MI->isDebugValue()) {
- DebugLoc DL = MI->getDebugLoc();
+ const DebugLoc &DL = MI->getDebugLoc();
if (DL != PrevInstLoc) {
if (DL) {
unsigned Flags = 0;
@@ -1067,78 +1020,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
}
}
}
-
- // Insert labels where requested.
- DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
- LabelsBeforeInsn.find(MI);
-
- // No label needed.
- if (I == LabelsBeforeInsn.end())
- return;
-
- // Label already assigned.
- if (I->second)
- return;
-
- if (!PrevLabel) {
- PrevLabel = MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(PrevLabel);
- }
- I->second = PrevLabel;
-}
-
-// Process end of an instruction.
-void DwarfDebug::endInstruction() {
- assert(CurMI != nullptr);
- // Don't create a new label after DBG_VALUE instructions.
- // They don't generate code.
- if (!CurMI->isDebugValue())
- PrevLabel = nullptr;
-
- DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
- LabelsAfterInsn.find(CurMI);
- CurMI = nullptr;
-
- // No label needed.
- if (I == LabelsAfterInsn.end())
- return;
-
- // Label already assigned.
- if (I->second)
- return;
-
- // We need a label after this instruction.
- if (!PrevLabel) {
- PrevLabel = MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(PrevLabel);
- }
- I->second = PrevLabel;
-}
-
-// Each LexicalScope has first instruction and last instruction to mark
-// beginning and end of a scope respectively. Create an inverse map that list
-// scopes starts (and ends) with an instruction. One instruction may start (or
-// end) multiple scopes. Ignore scopes that are not reachable.
-void DwarfDebug::identifyScopeMarkers() {
- SmallVector<LexicalScope *, 4> WorkList;
- WorkList.push_back(LScopes.getCurrentFunctionScope());
- while (!WorkList.empty()) {
- LexicalScope *S = WorkList.pop_back_val();
-
- const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
- if (!Children.empty())
- WorkList.append(Children.begin(), Children.end());
-
- if (S->isAbstractScope())
- continue;
-
- for (const InsnRange &R : S->getRanges()) {
- assert(R.first && "InsnRange does not have first instruction!");
- assert(R.second && "InsnRange does not have second instruction!");
- requestLabelBeforeInsn(R.first);
- requestLabelAfterInsn(R.second);
- }
- }
}
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
@@ -1167,15 +1048,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
- LScopes.initialize(*MF);
+ DebugHandlerBase::beginFunction(MF);
if (LScopes.empty())
return;
- assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
-
- // Make sure that each lexical scope will have a begin/end label.
- identifyScopeMarkers();
-
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
@@ -1188,55 +1064,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// isn't structurally identical (see: file path/name info from clang, which
// includes the directory of the cpp file being built, even when the file name
// is absolute (such as an <> lookup header)))
- DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
- assert(TheCU && "Unable to find compile unit!");
+ auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
+ DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit());
+ if (!TheCU) {
+ assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug &&
+ "DICompileUnit missing from llvm.dbg.cu?");
+ return;
+ }
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
else
Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
- // Calculate history for local variables.
- calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
- DbgValues);
-
- // Request labels for the full history.
- for (const auto &I : DbgValues) {
- const auto &Ranges = I.second;
- if (Ranges.empty())
- continue;
-
- // The first mention of a function argument gets the CurrentFnBegin
- // label, so arguments are visible when breaking at function entry.
- const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
- if (DIVar->isParameter() &&
- getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
- LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
- if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
- // Mark all non-overlapping initial pieces.
- for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
- const DIExpression *Piece = I->first->getDebugExpression();
- if (std::all_of(Ranges.begin(), I,
- [&](DbgValueHistoryMap::InstrRange Pred) {
- return !piecesOverlap(Piece, Pred.first->getDebugExpression());
- }))
- LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
- else
- break;
- }
- }
- }
-
- for (const auto &Range : Ranges) {
- requestLabelBeforeInsn(Range.first);
- if (Range.second)
- requestLabelAfterInsn(Range.second);
- }
- }
-
- PrevInstLoc = DebugLoc();
- PrevLabel = Asm->getFunctionBegin();
-
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
if (DILocation *L = PrologEndLoc) {
@@ -1252,13 +1092,19 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
- if (!MMI->hasDebugInfo() || LScopes.empty() ||
- !MF->getFunction()->getSubprogram()) {
+ const DISubprogram *SP = MF->getFunction()->getSubprogram();
+ if (!MMI->hasDebugInfo() || LScopes.empty() || !SP ||
+ SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
PrevCU = nullptr;
CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
+ // Mark functions with no debug info on any instructions, but a
+ // valid DISubprogram as processed.
+ if (SP)
+ ProcessedSPNodes.insert(SP);
return;
}
@@ -1266,8 +1112,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
- DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
+ SP = cast<DISubprogram>(FnScope->getScopeNode());
+ DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
DenseSet<InlinedVariable> ProcessedVars;
collectVariableInfo(TheCU, SP, ProcessedVars);
@@ -1277,17 +1123,16 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it.
- if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly &&
+ if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
assert(DbgValues.empty());
// FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
// by a -gmlt CU. Add a test and remove this assertion.
assert(AbstractVariables.empty());
- LabelsBeforeInsn.clear();
- LabelsAfterInsn.clear();
PrevLabel = nullptr;
CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
return;
}
@@ -1319,11 +1164,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
- DbgValues.clear();
- LabelsBeforeInsn.clear();
- LabelsAfterInsn.clear();
PrevLabel = nullptr;
CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
}
// Register a source line with debug info. Returns the unique label that was
@@ -1535,7 +1378,7 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
&DwarfCompileUnit::getGlobalTypes);
}
-// Emit visible names into a debug str section.
+/// Emit null-terminated strings into a debug str section.
void DwarfDebug::emitDebugStr() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
@@ -1554,8 +1397,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
unsigned PieceOffsetInBits) {
- DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(),
- AP.getDwarfDebug()->getDwarfVersion(),
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(),
Streamer);
// Regular entry.
if (Value.isInt()) {
@@ -1572,18 +1414,19 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
AP.EmitDwarfRegOp(Streamer, Loc);
else {
// Complex address entry.
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (Loc.getOffset()) {
- DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
+ DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
PieceOffsetInBits);
} else
- DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
+ DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(),
PieceOffsetInBits);
}
+ } else if (Value.isConstantFP()) {
+ APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
+ DwarfExpr.AddUnsignedConstant(RawBytes);
}
- // else ... ignore constant fp. There is not any good way to
- // to represent them here in dwarf.
- // FIXME: ^
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1608,8 +1451,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
if (Offset < PieceOffset) {
// The DWARF spec seriously mandates pieces with no locations for gaps.
- DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(),
- AP.getDwarfDebug()->getDwarfVersion(),
+ DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(),
Streamer);
Expr.AddOpPiece(PieceOffset-Offset, 0);
Offset += PieceOffset-Offset;
@@ -1708,24 +1550,12 @@ void DwarfDebug::emitDebugARanges() {
}
}
- // Add terminating symbols for each section.
- for (const auto &I : SectionMap) {
- MCSection *Section = I.first;
- MCSymbol *Sym = nullptr;
-
- if (Section)
- Sym = Asm->OutStreamer->endSection(Section);
-
- // Insert a final terminator.
- SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
- }
-
DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
for (auto &I : SectionMap) {
- const MCSection *Section = I.first;
+ MCSection *Section = I.first;
SmallVector<SymbolCU, 8> &List = I.second;
- if (List.size() < 2)
+ if (List.size() < 1)
continue;
// If we have no section (e.g. common), just write out
@@ -1735,26 +1565,29 @@ void DwarfDebug::emitDebugARanges() {
ArangeSpan Span;
Span.Start = Cur.Sym;
Span.End = nullptr;
- if (Cur.CU)
- Spans[Cur.CU].push_back(Span);
+ assert(Cur.CU);
+ Spans[Cur.CU].push_back(Span);
}
continue;
}
// Sort the symbols by offset within the section.
- std::sort(List.begin(), List.end(),
- [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
-
- // Symbols with no order assigned should be placed at the end.
- // (e.g. section end labels)
- if (IA == 0)
- return false;
- if (IB == 0)
- return true;
- return IA < IB;
- });
+ std::sort(
+ List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ return false;
+ if (IB == 0)
+ return true;
+ return IA < IB;
+ });
+
+ // Insert a final terminator.
+ List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
// Build spans between each label.
const MCSymbol *StartSym = List[0].Sym;
@@ -1767,6 +1600,7 @@ void DwarfDebug::emitDebugARanges() {
ArangeSpan Span;
Span.Start = StartSym;
Span.End = Cur.Sym;
+ assert(Prev.CU);
Spans[Prev.CU].push_back(Span);
StartSym = Cur.Sym;
}
@@ -1787,9 +1621,10 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the CU list (again, to ensure consistent output order).
- std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) {
- return A->getUniqueID() < B->getUniqueID();
- });
+ std::sort(CUs.begin(), CUs.end(),
+ [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
// Emit an arange table for each CU we used.
for (DwarfCompileUnit *CU : CUs) {
@@ -1827,7 +1662,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer->AddComment("Segment Size (in bytes)");
Asm->EmitInt8(0);
- Asm->OutStreamer->EmitFill(Padding, 0xff);
+ Asm->OutStreamer->emitFill(Padding, 0xff);
for (const ArangeSpan &Span : List) {
Asm->EmitLabelReference(Span.Start, PtrSize);
@@ -1852,7 +1687,7 @@ void DwarfDebug::emitDebugARanges() {
}
}
-// Emit visible names into a debug ranges section.
+/// Emit address ranges into a debug ranges section.
void DwarfDebug::emitDebugRanges() {
// Start the dwarf ranges section.
Asm->OutStreamer->SwitchSection(
@@ -1894,65 +1729,56 @@ void DwarfDebug::emitDebugRanges() {
}
}
-unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS,
- DIMacroNodeArray Nodes,
- DwarfCompileUnit &U) {
- unsigned Size = 0;
+void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
for (auto *MN : Nodes) {
if (auto *M = dyn_cast<DIMacro>(MN))
- Size += emitMacro(AS, *M);
+ emitMacro(*M);
else if (auto *F = dyn_cast<DIMacroFile>(MN))
- Size += emitMacroFile(AS, *F, U);
+ emitMacroFile(*F, U);
else
llvm_unreachable("Unexpected DI type!");
}
- return Size;
}
-unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) {
- int Size = 0;
- Size += AS->emitULEB128(M.getMacinfoType());
- Size += AS->emitULEB128(M.getLine());
+void DwarfDebug::emitMacro(DIMacro &M) {
+ Asm->EmitULEB128(M.getMacinfoType());
+ Asm->EmitULEB128(M.getLine());
StringRef Name = M.getName();
StringRef Value = M.getValue();
- Size += AS->emitBytes(Name);
+ Asm->OutStreamer->EmitBytes(Name);
if (!Value.empty()) {
// There should be one space between macro name and macro value.
- Size += AS->emitInt8(' ');
- Size += AS->emitBytes(Value);
+ Asm->EmitInt8(' ');
+ Asm->OutStreamer->EmitBytes(Value);
}
- Size += AS->emitInt8('\0');
- return Size;
+ Asm->EmitInt8('\0');
}
-unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
- DwarfCompileUnit &U) {
- int Size = 0;
+void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
- Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file);
- Size += AS->emitULEB128(F.getLine());
+ Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
+ Asm->EmitULEB128(F.getLine());
DIFile *File = F.getFile();
unsigned FID =
U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
- Size += AS->emitULEB128(FID);
- Size += handleMacroNodes(AS, F.getElements(), U);
- Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file);
- return Size;
+ Asm->EmitULEB128(FID);
+ handleMacroNodes(F.getElements(), U);
+ Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
}
-// Emit visible names into a debug macinfo section.
+/// Emit macros into a debug macinfo section.
void DwarfDebug::emitDebugMacinfo() {
- if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) {
- // Start the dwarf macinfo section.
- Asm->OutStreamer->SwitchSection(Macinfo);
- }
- std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm));
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfMacinfoSection());
+
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
- handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+ Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ handleMacroNodes(CUNode->getMacros(), U);
}
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->EmitInt8(0);
@@ -1961,7 +1787,7 @@ void DwarfDebug::emitDebugMacinfo() {
// DWARF5 Experimental Separate Dwarf emitters.
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
- std::unique_ptr<DwarfUnit> NewU) {
+ std::unique_ptr<DwarfCompileUnit> NewU) {
NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
U.getCUNode()->getSplitDebugFilename());
@@ -2050,21 +1876,19 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
return;
- const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy];
- if (TU) {
- CU.addDIETypeSignature(RefDie, *TU);
+ auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
+ if (!Ins.second) {
+ CU.addDIETypeSignature(RefDie, Ins.first->second);
return;
}
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit = make_unique<DwarfTypeUnit>(
- InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm,
- this, &InfoHolder, getDwoLineTable(CU));
+ auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
- TU = &NewTU;
TypeUnitsUnderConstruction.push_back(
std::make_pair(std::move(OwnedUnit), CTy));
@@ -2073,6 +1897,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
uint64_t Signature = makeTypeSignature(Identifier);
NewTU.setTypeSignature(Signature);
+ Ins.first->second = Signature;
if (useSplitDwarf())
NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
@@ -2096,7 +1921,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// This is pessimistic as some of these types might not be dependent on
// the type that used an address.
for (const auto &TU : TypeUnitsToAdd)
- DwarfTypeUnits.erase(TU.second);
+ TypeSignatures.erase(TU.second);
// Construct this type in the CU directly.
// This is inefficient because all the dependent types will be rebuilt
@@ -2108,10 +1933,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
// If the type wasn't dependent on fission addresses, finish adding the type
// and all its dependent types.
- for (auto &TU : TypeUnitsToAdd)
- InfoHolder.addUnit(std::move(TU.first));
+ for (auto &TU : TypeUnitsToAdd) {
+ InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
+ InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
+ }
}
- CU.addDIETypeSignature(RefDie, NewTU);
+ CU.addDIETypeSignature(RefDie, Signature);
}
// Accelerator table mutators - add each name along with its companion
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 460c186..6b06757 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,14 +14,13 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
-#include "AsmPrinterHandler.h"
#include "DbgValueHistoryCalculator.h"
+#include "DebugHandlerBase.h"
#include "DebugLocStream.h"
#include "DwarfAccelTable.h"
#include "DwarfFile.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
@@ -69,15 +68,14 @@ class DbgVariable {
unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
SmallVector<int, 1> FrameIndex; /// Frame index.
- DwarfDebug *DD;
public:
/// Construct a DbgVariable.
///
/// Creates a variable without any DW_AT_location. Call \a initializeMMI()
/// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
- DbgVariable(const DILocalVariable *V, const DILocation *IA, DwarfDebug *DD)
- : Var(V), IA(IA), DD(DD) {}
+ DbgVariable(const DILocalVariable *V, const DILocation *IA)
+ : Var(V), IA(IA) {}
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
@@ -111,6 +109,10 @@ public:
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
ArrayRef<const DIExpression *> getExpression() const { return Expr; }
+ const DIExpression *getSingleExpression() const {
+ assert(MInsn && Expr.size() <= 1);
+ return Expr.size() ? Expr[0] : nullptr;
+ }
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
@@ -174,9 +176,9 @@ public:
const DIType *getType() const;
private:
- /// Look in the DwarfDebug map for the MDNode that
- /// corresponds to the reference.
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const;
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
};
@@ -188,22 +190,13 @@ struct SymbolCU {
};
/// Collects and handles dwarf debug information.
-class DwarfDebug : public AsmPrinterHandler {
- /// Target of Dwarf emission.
- AsmPrinter *Asm;
-
- /// Collected machine module information.
- MachineModuleInfo *MMI;
-
+class DwarfDebug : public DebugHandlerBase {
/// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
/// Maps MDNode with its corresponding DwarfCompileUnit.
MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
- /// Maps subprogram MDNode with its corresponding DwarfCompileUnit.
- MapVector<const MDNode *, DwarfCompileUnit *> SPMap;
-
/// Maps a CU DIE with its corresponding DwarfCompileUnit.
DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
@@ -213,8 +206,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Size of each symbol emitted (for those symbols that have a specific size).
DenseMap<const MCSymbol *, uint64_t> SymSize;
- LexicalScopes LScopes;
-
/// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
@@ -227,32 +218,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// create DIEs.
SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
- /// Maps instruction with label emitted before instruction.
- DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
-
- /// Maps instruction with label emitted after instruction.
- DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
-
- /// History of DBG_VALUE and clobber instructions for each user
- /// variable. Variables are listed in order of appearance.
- DbgValueHistoryMap DbgValues;
-
- /// Previous instruction's location information. This is used to
- /// determine label location to indicate scope boundries in dwarf
- /// debug info.
- DebugLoc PrevInstLoc;
- MCSymbol *PrevLabel;
-
- /// This location indicates end of function prologue and beginning of
- /// function body.
- DebugLoc PrologEndLoc;
-
/// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn;
- /// If nonnull, stores the current machine instruction we're processing.
- const MachineInstr *CurMI;
-
/// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
@@ -266,9 +234,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// Holders for the various debug information flags that we might need to
/// have exposed. See accessor functions below for description.
- /// Map from MDNodes for user-defined types to the type units that
- /// describe them.
- DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
+ /// Map from MDNodes for user-defined types to their type signatures. Also
+ /// used to keep track of which types we have emitted type units for.
+ DenseMap<const MDNode *, uint64_t> TypeSignatures;
SmallVector<
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
@@ -280,18 +248,19 @@ class DwarfDebug : public AsmPrinterHandler {
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
- /// Whether to emit DW_AT_[MIPS_]linkage_name.
- bool UseLinkageNames;
+ /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format).
+ bool UseDWARF2Bitfields;
+
+ /// Whether to emit all linkage names, or just abstract subprograms.
+ bool UseAllLinkageNames;
/// Version of dwarf we're emitting.
unsigned DwarfVersion;
- /// Maps from a type identifier to the actual MDNode.
- DITypeIdentifierMap TypeIdentifierMap;
-
/// DWARF5 Experimental Options
/// @{
bool HasDwarfAccelTables;
+ bool HasAppleExtensionAttributes;
bool HasSplitDwarf;
/// Separated Dwarf Variables
@@ -324,9 +293,19 @@ class DwarfDebug : public AsmPrinterHandler {
// Identify a debugger for "tuning" the debug info.
DebuggerKind DebuggerTuning;
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// Should be used only within the constructor, to set feature flags.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ /// @}
+
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
- const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() {
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
return InfoHolder.getUnits();
}
@@ -347,9 +326,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// Collect info for variables that were optimized out.
- void collectDeadVariables();
-
void finishVariableDefinitions();
void finishSubprogramDefinitions();
@@ -397,7 +373,7 @@ class DwarfDebug : public AsmPrinterHandler {
bool GnuStyle, MCSection *PSec, StringRef Name,
const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
- /// Emit visible names into a debug str section.
+ /// Emit null-terminated strings into a debug str section.
void emitDebugStr();
/// Emit variable locations into a debug loc section.
@@ -414,17 +390,15 @@ class DwarfDebug : public AsmPrinterHandler {
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
- unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M);
- unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
- DwarfCompileUnit &U);
- unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes,
- DwarfCompileUnit &U);
+ void emitMacro(DIMacro &M);
+ void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
+ void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
- std::unique_ptr<DwarfUnit> NewU);
+ std::unique_ptr<DwarfCompileUnit> NewU);
/// Construct the split debug info compile unit for the debug info
/// section.
@@ -460,10 +434,6 @@ class DwarfDebug : public AsmPrinterHandler {
void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
unsigned Flags);
- /// Indentify instructions that are marking the beginning of or
- /// ending of a scope.
- void identifyScopeMarkers();
-
/// Populate LexicalScope entries with variables' info.
void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
DenseSet<InlinedVariable> &ProcessedVars);
@@ -477,16 +447,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// by MMI.
void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
- /// Ensure that a label will be emitted before MI.
- void requestLabelBeforeInsn(const MachineInstr *MI) {
- LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
- }
-
- /// Ensure that a label will be emitted after MI.
- void requestLabelAfterInsn(const MachineInstr *MI) {
- LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
- }
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -511,9 +471,6 @@ public:
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
- /// Process end of an instruction.
- void endInstruction() override;
-
/// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
static uint64_t makeTypeSignature(StringRef Identifier);
@@ -531,21 +488,17 @@ public:
SymSize[Sym] = Size;
}
- /// Returns whether to emit DW_AT_[MIPS_]linkage_name.
- bool useLinkageNames() const { return UseLinkageNames; }
+ /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name.
+ /// If not, we still might emit certain cases.
+ bool useAllLinkageNames() const { return UseAllLinkageNames; }
/// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
/// standard DW_OP_form_tls_address opcode
bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
- /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
- ///
- /// Returns whether we are "tuning" for a given debugger.
- /// @{
- bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
- bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
- bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
- /// @}
+ /// Returns whether to use the DWARF2 format for bitfields instyead of the
+ /// DWARF4 format.
+ bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }
// Experimental DWARF5 features.
@@ -553,6 +506,10 @@ public:
/// use to accelerate lookup.
bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
+ bool useAppleExtensionAttributes() const {
+ return HasAppleExtensionAttributes;
+ }
+
/// Returns whether or not to change the current debug info for the
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
@@ -577,12 +534,7 @@ public:
/// Find the MDNode for the given reference.
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve(TypeIdentifierMap);
- }
-
- /// Return the TypeIdentifierMap.
- const DITypeIdentifierMap &getTypeIdentifierMap() const {
- return TypeIdentifierMap;
+ return Ref.resolve();
}
/// Find the DwarfCompileUnit for the given CU Die.
@@ -608,12 +560,6 @@ public:
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
- /// Return Label preceding the instruction.
- MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
-
- /// Return Label immediately following the instruction.
- MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
-
// FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index f4667b4..8287f28 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -16,6 +16,7 @@
#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCDwarf.h"
namespace llvm {
class MachineFunction;
@@ -29,12 +30,16 @@ protected:
bool shouldEmitCFI;
void markFunctionEnd() override;
+ void endFragment() override;
};
class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
/// Per-function flag to indicate if .cfi_personality should be emitted.
bool shouldEmitPersonality;
+ /// Per-function flag to indicate if .cfi_personality must be emitted.
+ bool forceEmitPersonality;
+
/// Per-function flag to indicate if .cfi_lsda should be emitted.
bool shouldEmitLSDA;
@@ -59,6 +64,9 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
+
+ void beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) override;
};
class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 7b5b831..7dbc6cb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -65,8 +65,9 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
EmitOp(dwarf::DW_OP_shr);
}
-bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
- if (isFrameRegister(MachineReg)) {
+bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
+ unsigned MachineReg, int Offset) {
+ if (isFrameRegister(TRI, MachineReg)) {
// If variable offset is based in frame register then use fbreg.
EmitOp(dwarf::DW_OP_fbreg);
EmitSigned(Offset);
@@ -81,7 +82,8 @@ bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
return true;
}
-bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
+bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
+ unsigned MachineReg,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) {
if (!TRI.isPhysicalRegister(MachineReg))
@@ -159,29 +161,37 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
return CurPos > PieceOffsetInBits;
}
-void DwarfExpression::AddSignedConstant(int Value) {
- EmitOp(dwarf::DW_OP_consts);
- EmitSigned(Value);
- // The proper way to describe a constant value is
- // DW_OP_constu <const>, DW_OP_stack_value.
- // Unfortunately, DW_OP_stack_value was not available until DWARF-4,
- // so we will continue to generate DW_OP_constu <const> for DWARF-2
- // and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
- // actually describes a value at a constant addess, not a constant value.
- // However, in the past there was no better way to describe a constant
- // value, so the producers and consumers started to rely on heuristics
- // to disambiguate the value vs. location status of the expression.
- // See PR21176 for more details.
+void DwarfExpression::AddStackValue() {
if (DwarfVersion >= 4)
EmitOp(dwarf::DW_OP_stack_value);
}
-void DwarfExpression::AddUnsignedConstant(unsigned Value) {
+void DwarfExpression::AddSignedConstant(int64_t Value) {
+ EmitOp(dwarf::DW_OP_consts);
+ EmitSigned(Value);
+ AddStackValue();
+}
+
+void DwarfExpression::AddUnsignedConstant(uint64_t Value) {
EmitOp(dwarf::DW_OP_constu);
EmitUnsigned(Value);
- // cf. comment in DwarfExpression::AddSignedConstant().
- if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ AddStackValue();
+}
+
+void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
+ unsigned Size = Value.getBitWidth();
+ const uint64_t *Data = Value.getRawData();
+
+ // Chop it up into 64-bit pieces, because that's the maximum that
+ // AddUnsignedConstant takes.
+ unsigned Offset = 0;
+ while (Offset < Size) {
+ AddUnsignedConstant(*Data++);
+ if (Offset == 0 && Size <= 64)
+ break;
+ AddOpPiece(std::min(Size-Offset, 64u), Offset);
+ Offset += 64;
+ }
}
static unsigned getOffsetOrZero(unsigned OffsetInBits,
@@ -192,13 +202,14 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits,
return OffsetInBits;
}
-bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
+bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ const DIExpression *Expr,
unsigned MachineReg,
unsigned PieceOffsetInBits) {
auto I = Expr->expr_op_begin();
auto E = Expr->expr_op_end();
if (I == E)
- return AddMachineRegPiece(MachineReg);
+ return AddMachineRegPiece(TRI, MachineReg);
// Pattern-match combinations for which more efficient representations exist
// first.
@@ -208,7 +219,7 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
unsigned OffsetInBits = I->getArg(0);
unsigned SizeInBits = I->getArg(1);
// Piece always comes at the end of the expression.
- return AddMachineRegPiece(MachineReg, SizeInBits,
+ return AddMachineRegPiece(TRI, MachineReg, SizeInBits,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
}
case dwarf::DW_OP_plus:
@@ -219,15 +230,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
if (N != E && N->getOp() == dwarf::DW_OP_deref) {
unsigned Offset = I->getArg(0);
ValidReg = AddMachineRegIndirect(
- MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+ TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
std::advance(I, 2);
break;
} else
- ValidReg = AddMachineRegPiece(MachineReg);
+ ValidReg = AddMachineRegPiece(TRI, MachineReg);
}
case dwarf::DW_OP_deref: {
// [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(MachineReg);
+ ValidReg = AddMachineRegIndirect(TRI, MachineReg);
++I;
break;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 78ec937..5fff28d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -31,13 +31,10 @@ class DIELoc;
class DwarfExpression {
protected:
// Various convenience accessors that extract things out of AsmPrinter.
- const TargetRegisterInfo &TRI;
unsigned DwarfVersion;
public:
- DwarfExpression(const TargetRegisterInfo &TRI,
- unsigned DwarfVersion)
- : TRI(TRI), DwarfVersion(DwarfVersion) {}
+ DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
virtual ~DwarfExpression() {}
/// Output a dwarf operand and an optional assembler comment.
@@ -48,7 +45,7 @@ public:
virtual void EmitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
- virtual bool isFrameRegister(unsigned MachineReg) = 0;
+ virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
/// Emit a dwarf register operation.
void AddReg(int DwarfReg, const char *Comment = nullptr);
@@ -61,10 +58,24 @@ public:
void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
/// Emit a shift-right dwarf expression.
void AddShr(unsigned ShiftBy);
+ /// Emit a DW_OP_stack_value, if supported.
+ ///
+ /// The proper way to describe a constant value is
+ /// DW_OP_constu <const>, DW_OP_stack_value.
+ /// Unfortunately, DW_OP_stack_value was not available until DWARF-4,
+ /// so we will continue to generate DW_OP_constu <const> for DWARF-2
+ /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
+ /// actually describes a value at a constant addess, not a constant value.
+ /// However, in the past there was no better way to describe a constant
+ /// value, so the producers and consumers started to rely on heuristics
+ /// to disambiguate the value vs. location status of the expression.
+ /// See PR21176 for more details.
+ void AddStackValue();
/// Emit an indirect dwarf register operation for the given machine register.
/// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0);
+ bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ int Offset = 0);
/// \brief Emit a partial DWARF register operation.
/// \param MachineReg the register
@@ -80,20 +91,24 @@ public:
/// subregisters that alias the register.
///
/// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0,
+ bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned PieceSizeInBits = 0,
unsigned PieceOffsetInBits = 0);
/// Emit a signed constant.
- void AddSignedConstant(int Value);
+ void AddSignedConstant(int64_t Value);
+ /// Emit an unsigned constant.
+ void AddUnsignedConstant(uint64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(unsigned Value);
+ void AddUnsignedConstant(const APInt &Value);
/// \brief Emit an entire expression on top of a machine register location.
///
/// \param PieceOffsetInBits If this is one piece out of a fragmented
/// location, this is the offset of the piece inside the entire variable.
/// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg,
+ bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ const DIExpression *Expr, unsigned MachineReg,
unsigned PieceOffsetInBits = 0);
/// Emit a the operations remaining the DIExpressionIterator I.
/// \param PieceOffsetInBits If this is one piece out of a fragmented
@@ -108,14 +123,14 @@ class DebugLocDwarfExpression : public DwarfExpression {
ByteStreamer &BS;
public:
- DebugLocDwarfExpression(const TargetRegisterInfo &TRI,
- unsigned DwarfVersion, ByteStreamer &BS)
- : DwarfExpression(TRI, DwarfVersion), BS(BS) {}
+ DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
+ : DwarfExpression(DwarfVersion), BS(BS) {}
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
void EmitSigned(int64_t Value) override;
void EmitUnsigned(uint64_t Value) override;
- bool isFrameRegister(unsigned MachineReg) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
@@ -129,7 +144,8 @@ public:
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
void EmitSigned(int64_t Value) override;
void EmitUnsigned(uint64_t Value) override;
- bool isFrameRegister(unsigned MachineReg) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
};
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 51b27b4..e9fe98a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "DwarfFile.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
#include "llvm/ADT/STLExtras.h"
@@ -50,22 +51,25 @@ DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
return *New;
}
-void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
+void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
CUs.push_back(std::move(U));
}
// Emit the various dwarf units to the unit section USection with
// the abbreviations going into ASection.
void DwarfFile::emitUnits(bool UseOffsets) {
- for (const auto &TheU : CUs) {
- DIE &Die = TheU->getUnitDie();
- MCSection *USection = TheU->getSection();
- Asm->OutStreamer->SwitchSection(USection);
+ for (const auto &TheU : CUs)
+ emitUnit(TheU.get(), UseOffsets);
+}
- TheU->emitHeader(UseOffsets);
+void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
+ DIE &Die = TheU->getUnitDie();
+ MCSection *USection = TheU->getSection();
+ Asm->OutStreamer->SwitchSection(USection);
- Asm->emitDwarfDIE(Die);
- }
+ TheU->emitHeader(UseOffsets);
+
+ Asm->emitDwarfDIE(Die);
}
// Compute the size and offset for each DIE.
@@ -77,17 +81,20 @@ void DwarfFile::computeSizeAndOffsets() {
// DIE within each compile unit. All offsets are CU relative.
for (const auto &TheU : CUs) {
TheU->setDebugInfoOffset(SecOffset);
+ SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
+ }
+}
- // CU-relative offset is reset to 0 here.
- unsigned Offset = sizeof(int32_t) + // Length of Unit Info
- TheU->getHeaderSize(); // Unit-specific headers
+unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
+ // CU-relative offset is reset to 0 here.
+ unsigned Offset = sizeof(int32_t) + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
- // EndOffset here is CU-relative, after laying out
- // all of the CU DIE.
- unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset);
- SecOffset += EndOffset;
- }
+ // The return value here is CU-relative, after laying out
+ // all of the CU DIE.
+ return computeSizeAndOffset(TheU->getUnitDie(), Offset);
}
+
// Compute the size and offset of a DIE. The offset is relative to start of the
// CU. It returns the offset after laying out the DIE.
unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 8402027..b73d89b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -16,14 +16,15 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include <memory>
-#include <string>
#include <vector>
namespace llvm {
class AsmPrinter;
class DbgVariable;
+class DwarfCompileUnit;
class DwarfUnit;
class DIEAbbrev;
class MCSymbol;
@@ -46,7 +47,7 @@ class DwarfFile {
std::vector<DIEAbbrev *> Abbreviations;
// A pointer to all units in the section.
- SmallVector<std::unique_ptr<DwarfUnit>, 1> CUs;
+ SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
DwarfStringPool StrPool;
@@ -66,7 +67,9 @@ public:
~DwarfFile();
- const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return CUs; }
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
+ return CUs;
+ }
/// \brief Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
@@ -74,6 +77,10 @@ public:
/// \brief Compute the size and offset of all the DIEs.
void computeSizeAndOffsets();
+ /// \brief Compute the size and offset of all the DIEs in the given unit.
+ /// \returns The size of the root DIE.
+ unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
+
/// Define a unique number for the abbreviation.
///
/// Compute the abbreviation for \c Die, look up its unique number, and
@@ -81,12 +88,15 @@ public:
DIEAbbrev &assignAbbrevNumber(DIE &Die);
/// \brief Add a unit to the list of CUs.
- void addUnit(std::unique_ptr<DwarfUnit> U);
+ void addUnit(std::unique_ptr<DwarfCompileUnit> U);
/// \brief Emit all of the units to the section listed with the given
/// abbreviation section.
void emitUnits(bool UseOffsets);
+ /// \brief Emit the given unit to its section.
+ void emitUnit(DwarfUnit *U, bool UseOffsets);
+
/// \brief Emit a set of abbreviations to the specific section.
void emitAbbrevs(MCSection *);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index d75fea5..4100d72 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -46,9 +46,8 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
DIELoc &DIE)
- : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(),
- AP.getDwarfDebug()->getDwarfVersion()),
- AP(AP), DU(DU), DIE(DIE) {}
+ : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU),
+ DIE(DIE) {}
void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
@@ -59,25 +58,24 @@ void DIEDwarfExpression::EmitSigned(int64_t Value) {
void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
-bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
+bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
}
-DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag,
- const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU)
- : UniqueID(UID), CUNode(Node),
- UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), DebugInfoOffset(0),
- Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
+DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
+ AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+ : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A),
+ DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
assert(UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_type_unit);
}
-DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
+DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable)
- : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU),
- CU(CU), SplitLineTable(SplitLineTable) {
+ : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
+ SplitLineTable(SplitLineTable) {
if (SplitLineTable)
addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
}
@@ -268,7 +266,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
addDIEEntry(Die, Attribute, DIEEntry(Entry));
}
-void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
+void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
// Flag the type unit reference as a declaration so that if it contains
// members (implicit special members, static data member definitions, member
// declarations for definitions in this CU, etc) consumers don't get confused
@@ -276,7 +274,7 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
addFlag(Die, dwarf::DW_AT_declaration);
Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature,
- dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type));
+ dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
}
void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
@@ -370,14 +368,16 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
unsigned SizeInBits, unsigned OffsetInBits) {
DIEDwarfExpression Expr(*Asm, *this, TheDie);
- Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits);
+ Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg,
+ SizeInBits, OffsetInBits);
return true;
}
bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
int64_t Offset) {
DIEDwarfExpression Expr(*Asm, *this, TheDie);
- return Expr.AddMachineRegIndirect(Reg, Offset);
+ return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Reg, Offset);
}
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -561,32 +561,6 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
Ty->getTag() == dwarf::DW_TAG_unspecified_type;
}
-/// If this type is derived from a base type then return base type size.
-static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) {
- unsigned Tag = Ty->getTag();
-
- if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
- Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
- Tag != dwarf::DW_TAG_restrict_type)
- return Ty->getSizeInBits();
-
- auto *BaseType = DD->resolve(Ty->getBaseType());
-
- assert(BaseType && "Unexpected invalid base type");
-
- // If this is a derived type, go ahead and get the base type, unless it's a
- // reference then it's just the size of the field. Pointer types have no need
- // of this since they're a different type of qualification on the type.
- if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
- BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
- return Ty->getSizeInBits();
-
- if (auto *DT = dyn_cast<DIDerivedType>(BaseType))
- return getBaseTypeSize(DD, DT);
-
- return BaseType->getSizeInBits();
-}
-
void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
assert(MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
@@ -667,7 +641,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
}
void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
- if (!LinkageName.empty() && DD->useLinkageNames())
+ if (!LinkageName.empty())
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
@@ -720,8 +694,6 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
return nullptr;
auto *Ty = cast<DIType>(TyNode);
- assert(Ty == resolve(Ty->getRef()) &&
- "type was not uniqued, possible ODR violation.");
// DW_TAG_restrict_type is not supported in DWARF2
if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
@@ -903,6 +875,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
Language == dwarf::DW_LANG_ObjC))
addFlag(Buffer, dwarf::DW_AT_prototyped);
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CTy->getCC());
+
if (CTy->isLValueReference())
addFlag(Buffer, dwarf::DW_AT_reference);
@@ -1050,14 +1027,18 @@ void DwarfUnit::constructTemplateValueParameterDIE(
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
addConstantValue(ParamDIE, CI, resolve(VP->getType()));
else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
- // For declaration non-type template parameters (such as global values and
- // functions)
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- addOpAddress(*Loc, Asm->getSymbol(GV));
- // Emit DW_OP_stack_value to use the address as the immediate value of the
- // parameter, rather than a pointer to it.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
- addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
+ // We cannot describe the location of dllimport'd entities: the
+ // computation of their address requires loads from the IAT.
+ if (!GV->hasDLLImportStorageClass()) {
+ // For declaration non-type template parameters (such as global values
+ // and functions)
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addOpAddress(*Loc, Asm->getSymbol(GV));
+ // Emit DW_OP_stack_value to use the address as the immediate value of
+ // the parameter, rather than a pointer to it.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
+ }
} else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
assert(isa<MDString>(Val));
addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
@@ -1171,7 +1152,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
LinkageName == DeclLinkageName) &&
"decl has a linkage name and it is different");
- if (DeclLinkageName.empty())
+ if (DeclLinkageName.empty() &&
+ // Always emit it for abstract subprograms.
+ (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP)))
addLinkageName(SPDie, LinkageName);
if (!DeclDie)
@@ -1207,9 +1190,16 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
+ unsigned CC = 0;
DITypeRefArray Args;
- if (const DISubroutineType *SPTy = SP->getType())
+ if (const DISubroutineType *SPTy = SP->getType()) {
Args = SPTy->getTypeArray();
+ CC = SPTy->getCC();
+ }
+
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CC && CC != dwarf::DW_CC_normal)
+ addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC);
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
@@ -1220,10 +1210,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
unsigned VK = SP->getVirtuality();
if (VK) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
- DIELoc *Block = getDIELoc();
- addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
- addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
+ if (SP->getVirtualIndex() != -1u) {
+ DIELoc *Block = getDIELoc();
+ addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
+ }
ContainingTypeMap.insert(
std::make_pair(&SPDie, resolve(SP->getContainingType())));
}
@@ -1242,11 +1234,13 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->isLocalToUnit())
addFlag(SPDie, dwarf::DW_AT_external);
- if (SP->isOptimized())
- addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
+ if (DD->useAppleExtensionAttributes()) {
+ if (SP->isOptimized())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
- if (unsigned isa = Asm->getISAEncoding())
- addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ if (unsigned isa = Asm->getISAEncoding())
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
if (SP->isLValueReference())
addFlag(SPDie, dwarf::DW_AT_reference);
@@ -1388,58 +1382,49 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
} else {
uint64_t Size = DT->getSizeInBits();
- uint64_t FieldSize = getBaseTypeSize(DD, DT);
+ uint64_t FieldSize = DD->getBaseTypeSize(DT);
uint64_t OffsetInBytes;
- if (FieldSize && Size != FieldSize) {
+ bool IsBitfield = FieldSize && Size != FieldSize;
+ if (IsBitfield) {
// Handle bitfield, assume bytes are 8 bits.
- addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
+ if (DD->useDWARF2Bitfields())
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
- //
- // The DWARF 2 DW_AT_bit_offset is counting the bits between the most
- // significant bit of the aligned storage unit containing the bit field to
- // the most significan bit of the bit field.
- //
- // FIXME: DWARF 4 states that DW_AT_data_bit_offset (which
- // counts from the beginning, regardless of endianness) should
- // be used instead.
- //
- //
- // Struct Align Align Align
- // v v v v
- // +-----------+-----*-----+-----*-----+--
- // | ... |b1|b2|b3|b4|
- // +-----------+-----*-----+-----*-----+--
- // | | |<-- Size ->| |
- // |<---- Offset --->| |<--->|
- // | | | \_ DW_AT_bit_offset (little endian)
- // | |<--->|
- // |<--------->| \_ StartBitOffset = DW_AT_bit_offset (big endian)
- // \ = DW_AT_data_bit_offset (biendian)
- // \_ OffsetInBytes
+
uint64_t Offset = DT->getOffsetInBits();
uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
uint64_t AlignMask = ~(Align - 1);
// The bits from the start of the storage unit to the start of the field.
uint64_t StartBitOffset = Offset - (Offset & AlignMask);
- // The endian-dependent DWARF 2 offset.
- uint64_t DwarfBitOffset = Asm->getDataLayout().isLittleEndian()
- ? OffsetToAlignment(Offset + Size, Align)
- : StartBitOffset;
-
// The byte offset of the field's aligned storage unit inside the struct.
OffsetInBytes = (Offset - StartBitOffset) / 8;
- addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, DwarfBitOffset);
- } else
+
+ if (DD->useDWARF2Bitfields()) {
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
+ OffsetInBytes = FieldOffset >> 3;
+ } else {
+ addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset);
+ }
+ } else {
// This is not a bitfield.
OffsetInBytes = DT->getOffsetInBits() / 8;
+ }
if (DD->getDwarfVersion() <= 2) {
DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
- } else
+ } else if (!IsBitfield || DD->useDWARF2Bitfields())
addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
OffsetInBytes);
}
@@ -1524,8 +1509,11 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- Asm->emitDwarfSymbolReference(TLOF.getDwarfAbbrevSection()->getBeginSymbol(),
- UseOffsets);
+ if (UseOffsets)
+ Asm->EmitInt32(0);
+ else
+ Asm->emitDwarfSymbolReference(
+ TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
Asm->OutStreamer->AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 82760bf..e225f92 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -67,9 +67,6 @@ public:
/// source file.
class DwarfUnit {
protected:
- /// A numeric ID unique among all CUs in the module
- unsigned UniqueID;
-
/// MDNode for the compile unit.
const DICompileUnit *CUNode;
@@ -79,9 +76,6 @@ protected:
/// Unit debug information entry.
DIE &UnitDie;
- /// Offset of the UnitDie from beginning of debug info section.
- unsigned DebugInfoOffset;
-
/// Target of Dwarf emission.
AsmPrinter *Asm;
@@ -110,8 +104,8 @@ protected:
/// The section this unit will be emitted in.
MCSection *Section;
- DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
+ DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU);
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
@@ -127,14 +121,10 @@ public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
- unsigned getUniqueID() const { return UniqueID; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
DIE &getUnitDie() { return UnitDie; }
- unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
- void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
-
/// Return true if this compile unit has something to write out.
bool hasContent() const { return UnitDie.hasChildren(); }
@@ -221,7 +211,7 @@ public:
void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
/// Add a type's DW_AT_signature and set the declaration flag.
- void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
+ void addDIETypeSignature(DIE &Die, uint64_t Signature);
/// Add an attribute containing the type signature for a unique identifier.
void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
StringRef Identifier);
@@ -338,7 +328,7 @@ protected:
/// Look in the DwarfDebug map for the MDNode that corresponds to the
/// reference.
template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return DD->resolve(Ref);
+ return Ref.resolve();
}
private:
@@ -383,12 +373,10 @@ class DwarfTypeUnit : public DwarfUnit {
bool isDwoUnit() const override;
public:
- DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU,
- MCDwarfDwoLineTable *SplitLineTable = nullptr);
+ DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr);
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
- uint64_t getTypeSignature() const { return TypeSignature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
/// Emit the header for this unit, not including the initial length field.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index c6a0e9d..080fdd1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -22,7 +22,6 @@ struct LandingPadInfo;
class MachineModuleInfo;
class MachineInstr;
class MachineFunction;
-class AsmPrinter;
class MCSymbol;
class MCSymbolRefExpr;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
deleted file mode 100644
index 1e2f55b..0000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ /dev/null
@@ -1,411 +0,0 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing line tables info into COFF files.
-//
-//===----------------------------------------------------------------------===//
-
-#include "WinCodeViewLineTables.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/COFF.h"
-
-namespace llvm {
-
-StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
- assert(S);
- assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) ||
- isa<DILexicalBlockBase>(S)) &&
- "Unexpected scope info");
-
- auto *Scope = cast<DIScope>(S);
- StringRef Dir = Scope->getDirectory(),
- Filename = Scope->getFilename();
- std::string &Filepath =
- DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
- if (!Filepath.empty())
- return Filepath;
-
- // Clang emits directory and relative filename info into the IR, but CodeView
- // operates on full paths. We could change Clang to emit full paths too, but
- // that would increase the IR size and probably not needed for other users.
- // For now, just concatenate and canonicalize the path here.
- if (Filename.find(':') == 1)
- Filepath = Filename;
- else
- Filepath = (Dir + "\\" + Filename).str();
-
- // Canonicalize the path. We have to do it textually because we may no longer
- // have access the file in the filesystem.
- // First, replace all slashes with backslashes.
- std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
-
- // Remove all "\.\" with "\".
- size_t Cursor = 0;
- while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
- Filepath.erase(Cursor, 2);
-
- // Replace all "\XXX\..\" with "\". Don't try too hard though as the original
- // path should be well-formatted, e.g. start with a drive letter, etc.
- Cursor = 0;
- while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
- // Something's wrong if the path starts with "\..\", abort.
- if (Cursor == 0)
- break;
-
- size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
- if (PrevSlash == std::string::npos)
- // Something's wrong, abort.
- break;
-
- Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
- // The next ".." might be following the one we've just erased.
- Cursor = PrevSlash;
- }
-
- // Remove all duplicate backslashes.
- Cursor = 0;
- while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
- Filepath.erase(Cursor, 1);
-
- return Filepath;
-}
-
-void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
- const MachineFunction *MF) {
- const MDNode *Scope = DL.getScope();
- if (!Scope)
- return;
- unsigned LineNumber = DL.getLine();
- // Skip this line if it is longer than the maximum we can record.
- if (LineNumber > COFF::CVL_MaxLineNumber)
- return;
-
- unsigned ColumnNumber = DL.getCol();
- // Truncate the column number if it is longer than the maximum we can record.
- if (ColumnNumber > COFF::CVL_MaxColumnNumber)
- ColumnNumber = 0;
-
- StringRef Filename = getFullFilepath(Scope);
-
- // Skip this instruction if it has the same file:line as the previous one.
- assert(CurFn);
- if (!CurFn->Instrs.empty()) {
- const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
- if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber &&
- LastInstr.ColumnNumber == ColumnNumber)
- return;
- }
- FileNameRegistry.add(Filename);
-
- MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(MCL);
- CurFn->Instrs.push_back(MCL);
- InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber);
-}
-
-WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
- : Asm(nullptr), CurFn(nullptr) {
- MachineModuleInfo *MMI = AP->MMI;
-
- // If module doesn't have named metadata anchors or COFF debug section
- // is not available, skip any debug info related stuff.
- if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
- !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
- return;
-
- // Tell MMI that we have debug info.
- MMI->setDebugInfoAvailability(true);
- Asm = AP;
-}
-
-void WinCodeViewLineTables::endModule() {
- if (FnDebugInfo.empty())
- return;
-
- assert(Asm != nullptr);
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
- Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
-
- // The COFF .debug$S section consists of several subsections, each starting
- // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
- // of the payload followed by the payload itself. The subsections are 4-byte
- // aligned.
-
- // Emit per-function debug information. This code is extracted into a
- // separate function for readability.
- for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
- emitDebugInfoForFunction(VisitedFunctions[I]);
-
- // This subsection holds a file index to offset in string table table.
- Asm->OutStreamer->AddComment("File index to string table offset subsection");
- Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
- size_t NumFilenames = FileNameRegistry.Infos.size();
- Asm->EmitInt32(8 * NumFilenames);
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- StringRef Filename = FileNameRegistry.Filenames[I];
- // For each unique filename, just write its offset in the string table.
- Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
- // The function name offset is not followed by any additional data.
- Asm->EmitInt32(0);
- }
-
- // This subsection holds the string table.
- Asm->OutStreamer->AddComment("String table");
- Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
- Asm->EmitInt32(FileNameRegistry.LastOffset);
- // The payload starts with a null character.
- Asm->EmitInt8(0);
-
- for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
- // Just emit unique filenames one by one, separated by a null character.
- Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
- Asm->EmitInt8(0);
- }
-
- // No more subsections. Fill with zeros to align the end of the section by 4.
- Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
-
- clear();
-}
-
-static void EmitLabelDiff(MCStreamer &Streamer,
- const MCSymbol *From, const MCSymbol *To,
- unsigned int Size = 4) {
- MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- MCContext &Context = Streamer.getContext();
- const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
- *ToRef = MCSymbolRefExpr::create(To, Variant, Context);
- const MCExpr *AddrDelta =
- MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
- Streamer.EmitValue(AddrDelta, Size);
-}
-
-void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
- // For each function there is a separate subsection
- // which holds the PC to file:line table.
- const MCSymbol *Fn = Asm->getSymbol(GV);
- assert(Fn);
-
- const FunctionInfo &FI = FnDebugInfo[GV];
- if (FI.Instrs.empty())
- return;
- assert(FI.End && "Don't know where the function ends?");
-
- StringRef GVName = GV->getName();
- StringRef FuncName;
- if (auto *SP = getDISubprogram(GV))
- FuncName = SP->getDisplayName();
-
- // FIXME Clang currently sets DisplayName to "bar" for a C++
- // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying
- // to demangle display names anyways, so let's just put a mangled name into
- // the symbols subsection until Clang gives us what we need.
- if (GVName.startswith("\01?"))
- FuncName = GVName.substr(1);
- // Emit a symbol subsection, required by VS2012+ to find function boundaries.
- MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
- *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
- Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
- Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
- EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
- Asm->OutStreamer->EmitLabel(SymbolsBegin);
- {
- MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
- *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
- EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
- Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
-
- Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
- // Some bytes of this segment don't seem to be required for basic debugging,
- // so just fill them with zeroes.
- Asm->OutStreamer->EmitFill(12, 0);
- // This is the important bit that tells the debugger where the function
- // code is located and what's its size:
- EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
- Asm->OutStreamer->EmitFill(12, 0);
- Asm->OutStreamer->EmitCOFFSecRel32(Fn);
- Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
- Asm->EmitInt8(0);
- // Emit the function display name as a null-terminated string.
- Asm->OutStreamer->EmitBytes(FuncName);
- Asm->EmitInt8(0);
- Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
-
- // We're done with this function.
- Asm->EmitInt16(0x0002);
- Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
- }
- Asm->OutStreamer->EmitLabel(SymbolsEnd);
- // Every subsection must be aligned to a 4-byte boundary.
- Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
-
- // PCs/Instructions are grouped into segments sharing the same filename.
- // Pre-calculate the lengths (in instructions) of these segments and store
- // them in a map for convenience. Each index in the map is the sequential
- // number of the respective instruction that starts a new segment.
- DenseMap<size_t, size_t> FilenameSegmentLengths;
- size_t LastSegmentEnd = 0;
- StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename;
- for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
- if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename)
- continue;
- FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
- LastSegmentEnd = J;
- PrevFilename = InstrInfo[FI.Instrs[J]].Filename;
- }
- FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
-
- // Emit a line table subsection, required to do PC-to-file:line lookup.
- Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
- Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
- MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
- *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
- EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
- Asm->OutStreamer->EmitLabel(LineTableBegin);
-
- // Identify the function this subsection is for.
- Asm->OutStreamer->EmitCOFFSecRel32(Fn);
- Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
- // Insert flags after a 16-bit section index.
- Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
-
- // Length of the function's code, in bytes.
- EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
-
- // PC-to-linenumber lookup table:
- MCSymbol *FileSegmentEnd = nullptr;
-
- // The start of the last segment:
- size_t LastSegmentStart = 0;
-
- auto FinishPreviousChunk = [&] {
- if (!FileSegmentEnd)
- return;
- for (size_t ColSegI = LastSegmentStart,
- ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
- ColSegI != ColSegEnd; ++ColSegI) {
- unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
- assert(ColumnNumber <= COFF::CVL_MaxColumnNumber);
- Asm->EmitInt16(ColumnNumber); // Start column
- Asm->EmitInt16(0); // End column
- }
- Asm->OutStreamer->EmitLabel(FileSegmentEnd);
- };
-
- for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
- MCSymbol *Instr = FI.Instrs[J];
- assert(InstrInfo.count(Instr));
-
- if (FilenameSegmentLengths.count(J)) {
- // We came to a beginning of a new filename segment.
- FinishPreviousChunk();
- StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
- assert(FileNameRegistry.Infos.count(CurFilename));
- size_t IndexInStringTable =
- FileNameRegistry.Infos[CurFilename].FilenameID;
- // Each segment starts with the offset of the filename
- // in the string table.
- Asm->OutStreamer->AddComment(
- "Segment for file '" + Twine(CurFilename) + "' begins");
- MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
- Asm->OutStreamer->EmitLabel(FileSegmentBegin);
- Asm->EmitInt32(8 * IndexInStringTable);
-
- // Number of PC records in the lookup table.
- size_t SegmentLength = FilenameSegmentLengths[J];
- Asm->EmitInt32(SegmentLength);
-
- // Full size of the segment for this filename, including the prev two
- // records.
- FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
- EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
- LastSegmentStart = J;
- }
-
- // The first PC with the given linenumber and the linenumber itself.
- EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
- uint32_t LineNumber = InstrInfo[Instr].LineNumber;
- assert(LineNumber <= COFF::CVL_MaxLineNumber);
- uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
- Asm->EmitInt32(LineData);
- }
-
- FinishPreviousChunk();
- Asm->OutStreamer->EmitLabel(LineTableEnd);
-}
-
-void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
- assert(!CurFn && "Can't process two functions at once!");
-
- if (!Asm || !Asm->MMI->hasDebugInfo())
- return;
-
- const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV) == false);
- VisitedFunctions.push_back(GV);
- CurFn = &FnDebugInfo[GV];
-
- // Find the end of the function prolog.
- // FIXME: is there a simpler a way to do this? Can we just search
- // for the first instruction of the function, not the last of the prolog?
- DebugLoc PrologEndLoc;
- bool EmptyPrologue = true;
- for (const auto &MBB : *MF) {
- if (PrologEndLoc)
- break;
- for (const auto &MI : MBB) {
- if (MI.isDebugValue())
- continue;
-
- // First known non-DBG_VALUE and non-frame setup location marks
- // the beginning of the function body.
- // FIXME: do we need the first subcondition?
- if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
- PrologEndLoc = MI.getDebugLoc();
- break;
- }
- EmptyPrologue = false;
- }
- }
- // Record beginning of function if we have a non-empty prologue.
- if (PrologEndLoc && !EmptyPrologue) {
- DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
- maybeRecordLocation(FnStartDL, MF);
- }
-}
-
-void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
- if (!Asm || !CurFn) // We haven't created any debug info for this function.
- return;
-
- const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV));
- assert(CurFn == &FnDebugInfo[GV]);
-
- if (CurFn->Instrs.empty()) {
- FnDebugInfo.erase(GV);
- VisitedFunctions.pop_back();
- } else {
- CurFn->End = Asm->getFunctionEnd();
- }
- CurFn = nullptr;
-}
-
-void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
- // Ignore DBG_VALUE locations and function prologue.
- if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
- return;
- DebugLoc DL = MI->getDebugLoc();
- if (DL == PrevInstLoc || !DL)
- return;
- maybeRecordLocation(DL, Asm->MF);
-}
-}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
deleted file mode 100644
index 78068e0..0000000
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing line tables info into COFF files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
-#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H
-
-#include "AsmPrinterHandler.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-
-namespace llvm {
-/// \brief Collects and handles line tables information in a CodeView format.
-class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
- AsmPrinter *Asm;
- DebugLoc PrevInstLoc;
-
- // For each function, store a vector of labels to its instructions, as well as
- // to the end of the function.
- struct FunctionInfo {
- SmallVector<MCSymbol *, 10> Instrs;
- MCSymbol *End;
- FunctionInfo() : End(nullptr) {}
- } *CurFn;
-
- typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy;
- FnDebugInfoTy FnDebugInfo;
- // Store the functions we've visited in a vector so we can maintain a stable
- // order while emitting subsections.
- SmallVector<const Function *, 10> VisitedFunctions;
-
- // InstrInfoTy - Holds the Filename:LineNumber information for every
- // instruction with a unique debug location.
- struct InstrInfoTy {
- StringRef Filename;
- unsigned LineNumber;
- unsigned ColumnNumber;
-
- InstrInfoTy() : LineNumber(0), ColumnNumber(0) {}
-
- InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber)
- : Filename(Filename), LineNumber(LineNumber),
- ColumnNumber(ColumnNumber) {}
- };
- DenseMap<MCSymbol *, InstrInfoTy> InstrInfo;
-
- // FileNameRegistry - Manages filenames observed while generating debug info
- // by filtering out duplicates and bookkeeping the offsets in the string
- // table to be generated.
- struct FileNameRegistryTy {
- SmallVector<StringRef, 10> Filenames;
- struct PerFileInfo {
- size_t FilenameID, StartOffset;
- };
- StringMap<PerFileInfo> Infos;
-
- // The offset in the string table where we'll write the next unique
- // filename.
- size_t LastOffset;
-
- FileNameRegistryTy() {
- clear();
- }
-
- // Add Filename to the registry, if it was not observed before.
- void add(StringRef Filename) {
- if (Infos.count(Filename))
- return;
- size_t OldSize = Infos.size();
- Infos[Filename].FilenameID = OldSize;
- Infos[Filename].StartOffset = LastOffset;
- LastOffset += Filename.size() + 1;
- Filenames.push_back(Filename);
- }
-
- void clear() {
- LastOffset = 1;
- Infos.clear();
- Filenames.clear();
- }
- } FileNameRegistry;
-
- typedef std::map<std::pair<StringRef, StringRef>, std::string>
- DirAndFilenameToFilepathMapTy;
- DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap;
- StringRef getFullFilepath(const MDNode *S);
-
- void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF);
-
- void clear() {
- assert(CurFn == nullptr);
- FileNameRegistry.clear();
- InstrInfo.clear();
- }
-
- void emitDebugInfoForFunction(const Function *GV);
-
-public:
- WinCodeViewLineTables(AsmPrinter *Asm);
-
- void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
-
- /// \brief Emit the COFF section that holds the line table information.
- void endModule() override;
-
- /// \brief Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// \brief Gather post-function debug information.
- void endFunction(const MachineFunction *) override;
-
- /// \brief Process beginning of an instruction.
- void beginInstruction(const MachineInstr *MI) override;
-
- /// \brief Process end of an instruction.
- void endInstruction() override {}
-};
-} // End of namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 4da5b58..e5933d8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "WinException.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -35,6 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -125,10 +125,9 @@ void WinException::endFunction(const MachineFunction *MF) {
if (shouldEmitPersonality || shouldEmitLSDA) {
Asm->OutStreamer->PushSection();
- // Just switch sections to the right xdata section. This use of CurrentFnSym
- // assumes that we only emit the LSDA when ending the parent function.
- MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym,
- Asm->OutContext);
+ // Just switch sections to the right xdata section.
+ MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+ Asm->OutStreamer->getCurrentSectionOnly());
Asm->OutStreamer->SwitchSection(XData);
// Emit the tables appropriate to the personality function in use. If we
@@ -303,8 +302,17 @@ int WinException::getFrameIndexOffset(int FrameIndex,
const WinEHFuncInfo &FuncInfo) {
const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
unsigned UnusedReg;
- if (Asm->MAI->usesWindowsCFI())
- return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg);
+ if (Asm->MAI->usesWindowsCFI()) {
+ int Offset =
+ TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
+ /*IgnoreSPUpdates*/ true);
+ assert(UnusedReg ==
+ Asm->MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore());
+ return Offset;
+ }
+
// For 32-bit, offsets should be relative to the end of the EH registration
// node. For 64-bit, it's relative to SP at the end of the prologue.
assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
@@ -793,6 +801,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
const MCExpr *FrameAllocOffsetRef = nullptr;
if (HT.CatchObj.FrameIndex != INT_MAX) {
int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+ assert(Offset != 0 && "Illegal offset for catch object!");
FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
} else {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
@@ -945,15 +954,42 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// ScopeTableEntry ScopeRecord[];
// };
//
- // Only the EHCookieOffset field appears to vary, and it appears to be the
- // offset from the final saved SP value to the retaddr.
+ // Offsets are %ebp relative.
+ //
+ // The GS cookie is present only if the function needs stack protection.
+ // GSCookieOffset = -2 means that GS cookie is not used.
+ //
+ // The EH cookie is always present.
+ //
+ // Check is done the following way:
+ // (ebp+CookieXOROffset) ^ [ebp+CookieOffset] == _security_cookie
+
+ // Retrieve the Guard Stack slot.
+ int GSCookieOffset = -2;
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->hasStackProtectorIndex()) {
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int SSPIdx = MFI->getStackProtectorIndex();
+ GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
+ }
+
+ // Retrieve the EH Guard slot.
+ // TODO(etienneb): Get rid of this value and change it for and assertion.
+ int EHCookieOffset = 9999;
+ if (FuncInfo.EHGuardFrameIndex != INT_MAX) {
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
+ EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg);
+ }
+
AddComment("GSCookieOffset");
- OS.EmitIntValue(-2, 4);
+ OS.EmitIntValue(GSCookieOffset, 4);
AddComment("GSCookieXOROffset");
OS.EmitIntValue(0, 4);
- // FIXME: Calculate.
AddComment("EHCookieOffset");
- OS.EmitIntValue(9999, 4);
+ OS.EmitIntValue(EHCookieOffset, 4);
AddComment("EHCookieXOROffset");
OS.EmitIntValue(0, 4);
BaseState = -2;
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index d12fdb2..bf5cf10 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
-// target specific instruction which implement the same semantics in a way
-// which better fits the target backend. This can include the use of either
-// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
-// type coercions.
+// __atomic_* library calls, or target specific instruction which implement the
+// same semantics in a way which better fits the target backend. This can
+// include the use of (intrinsic-based) load-linked/store-conditional loops,
+// AtomicCmpXchg, or type coercions.
//
//===----------------------------------------------------------------------===//
@@ -57,25 +57,121 @@ namespace {
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicOpToLLSC(
- Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
- std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
+ Value *
+ insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandPartwordAtomicRMW(
+ AtomicRMWInst *I,
+ TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+
+ AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
+ static Value *insertRMWCmpXchgLoop(
+ IRBuilder<> &Builder, Type *ResultType, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
+
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
+
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
+ Value *PointerOperand, Value *ValueOperand,
+ Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2,
+ ArrayRef<RTLIB::Libcall> Libcalls);
+ void expandAtomicLoadToLibcall(LoadInst *LI);
+ void expandAtomicStoreToLibcall(StoreInst *LI);
+ void expandAtomicRMWToLibcall(AtomicRMWInst *I);
+ void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
+
+ friend bool
+ llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg);
};
}
char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
- "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
- false, false)
+INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
+ false, false)
FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
return new AtomicExpand(TM);
}
+namespace {
+// Helper functions to retrieve the size of atomic instructions.
+unsigned getAtomicOpSize(LoadInst *LI) {
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(LI->getType());
+}
+
+unsigned getAtomicOpSize(StoreInst *SI) {
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(SI->getValueOperand()->getType());
+}
+
+unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
+ const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
+}
+
+unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
+ const DataLayout &DL = CASI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
+}
+
+// Helper functions to retrieve the alignment of atomic instructions.
+unsigned getAtomicOpAlign(LoadInst *LI) {
+ unsigned Align = LI->getAlignment();
+ // In the future, if this IR restriction is relaxed, we should
+ // return DataLayout::getABITypeAlignment when there's no align
+ // value.
+ assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
+ return Align;
+}
+
+unsigned getAtomicOpAlign(StoreInst *SI) {
+ unsigned Align = SI->getAlignment();
+ // In the future, if this IR restriction is relaxed, we should
+ // return DataLayout::getABITypeAlignment when there's no align
+ // value.
+ assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
+ return Align;
+}
+
+unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
+ // TODO(PR27168): This instruction has no alignment attribute, but unlike the
+ // default alignment for load/store, the default here is to assume
+ // it has NATURAL alignment, not DataLayout-specified alignment.
+ const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
+}
+
+unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
+ // TODO(PR27168): same comment as above.
+ const DataLayout &DL = CASI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
+}
+
+// Determine if a particular atomic operation has a supported size,
+// and is of appropriate alignment, to be passed through for target
+// lowering. (Versus turning into a __atomic libcall)
+template <typename Inst>
+bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+ return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+}
+
+} // end anonymous namespace
+
bool AtomicExpand::runOnFunction(Function &F) {
if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
return false;
@@ -85,9 +181,10 @@ bool AtomicExpand::runOnFunction(Function &F) {
// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (I->isAtomic())
- AtomicInsts.push_back(&*I);
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+ if (I->isAtomic() && !isa<FenceInst>(I))
+ AtomicInsts.push_back(I);
}
bool MadeChange = false;
@@ -96,41 +193,67 @@ bool AtomicExpand::runOnFunction(Function &F) {
auto SI = dyn_cast<StoreInst>(I);
auto RMWI = dyn_cast<AtomicRMWInst>(I);
auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
- assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
- "Unknown atomic instruction");
+ assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
+
+ // If the Size/Alignment is not supported, replace with a libcall.
+ if (LI) {
+ if (!atomicSizeSupported(TLI, LI)) {
+ expandAtomicLoadToLibcall(LI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (SI) {
+ if (!atomicSizeSupported(TLI, SI)) {
+ expandAtomicStoreToLibcall(SI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (RMWI) {
+ if (!atomicSizeSupported(TLI, RMWI)) {
+ expandAtomicRMWToLibcall(RMWI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (CASI) {
+ if (!atomicSizeSupported(TLI, CASI)) {
+ expandAtomicCASToLibcall(CASI);
+ MadeChange = true;
+ continue;
+ }
+ }
- auto FenceOrdering = Monotonic;
- bool IsStore, IsLoad;
- if (TLI->getInsertFencesForAtomic()) {
- if (LI && isAtLeastAcquire(LI->getOrdering())) {
+ if (TLI->shouldInsertFencesForAtomic(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ bool IsStore, IsLoad;
+ if (LI && isAcquireOrStronger(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
- LI->setOrdering(Monotonic);
+ LI->setOrdering(AtomicOrdering::Monotonic);
IsStore = false;
IsLoad = true;
- } else if (SI && isAtLeastRelease(SI->getOrdering())) {
+ } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
FenceOrdering = SI->getOrdering();
- SI->setOrdering(Monotonic);
+ SI->setOrdering(AtomicOrdering::Monotonic);
IsStore = true;
IsLoad = false;
- } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) ||
- isAtLeastAcquire(RMWI->getOrdering()))) {
+ } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
+ isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
- RMWI->setOrdering(Monotonic);
+ RMWI->setOrdering(AtomicOrdering::Monotonic);
IsStore = IsLoad = true;
} else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
- (isAtLeastRelease(CASI->getSuccessOrdering()) ||
- isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
// expandAtomicCmpXchg in that case.
FenceOrdering = CASI->getSuccessOrdering();
- CASI->setSuccessOrdering(Monotonic);
- CASI->setFailureOrdering(Monotonic);
+ CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
+ CASI->setFailureOrdering(AtomicOrdering::Monotonic);
IsStore = IsLoad = true;
}
- if (FenceOrdering != Monotonic) {
+ if (FenceOrdering != AtomicOrdering::Monotonic) {
MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
}
}
@@ -143,7 +266,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
assert(LI->getType()->isIntegerTy() && "invariant broken");
MadeChange = true;
}
-
+
MadeChange |= tryExpandAtomicLoad(LI);
} else if (SI) {
if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
@@ -168,8 +291,30 @@ bool AtomicExpand::runOnFunction(Function &F) {
} else {
MadeChange |= tryExpandAtomicRMW(RMWI);
}
- } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
- MadeChange |= expandAtomicCmpXchg(CASI);
+ } else if (CASI) {
+ // TODO: when we're ready to make the change at the IR level, we can
+ // extend convertCmpXchgToInteger for floating point too.
+ assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
+ "unimplemented - floating point not legal at IR level");
+ if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ CASI = convertCmpXchgToIntegerType(CASI);
+ assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
+
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(CASI);
+ if (ValueSize < MinCASSize) {
+ assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
+ expandPartwordCmpXchg(CASI);
+ } else {
+ if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
+ MadeChange |= expandAtomicCmpXchg(CASI);
+ }
}
}
return MadeChange;
@@ -206,7 +351,7 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
}
/// Convert an atomic load of a non-integral type to an integer load of the
-/// equivelent bitwidth. See the function comment on
+/// equivalent bitwidth. See the function comment on
/// convertAtomicStoreToIntegerType for background.
LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
@@ -237,9 +382,10 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC:
- return expandAtomicOpToLLSC(
- LI, LI->getPointerOperand(), LI->getOrdering(),
+ expandAtomicOpToLLSC(
+ LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
[](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ return true;
case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
@@ -283,7 +429,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
}
/// Convert an atomic store of a non-integral type to an integer store of the
-/// equivelent bitwidth. We used to not support floating point or vector
+/// equivalent bitwidth. We used to not support floating point or vector
/// atomics in the IR at all. The backends learned to deal with the bitcast
/// idiom because that was the only way of expressing the notion of a atomic
/// float or vector store. The long term plan is to teach each backend to
@@ -380,32 +526,353 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
- case TargetLoweringBase::AtomicExpansionKind::LLSC:
- return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
- [&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(),
- Builder, Loaded,
- AI->getValOperand());
- });
- case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
- return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ llvm_unreachable(
+ "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
+ } else {
+ auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ };
+ expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
+ AI->getOrdering(), PerformOp);
+ }
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::CmpXChg);
+ } else {
+ expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ }
+ return true;
+ }
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
}
-bool AtomicExpand::expandAtomicOpToLLSC(
- Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
- std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
+namespace {
+
+/// Result values from createMaskInstrs helper.
+struct PartwordMaskValues {
+ Type *WordType;
+ Type *ValueType;
+ Value *AlignedAddr;
+ Value *ShiftAmt;
+ Value *Mask;
+ Value *Inv_Mask;
+};
+} // end anonymous namespace
+
+/// This is a helper function which builds instructions to provide
+/// values necessary for partword atomic operations. It takes an
+/// incoming address, Addr, and ValueType, and constructs the address,
+/// shift-amounts and masks needed to work with a larger value of size
+/// WordSize.
+///
+/// AlignedAddr: Addr rounded down to a multiple of WordSize
+///
+/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
+/// from AlignAddr for it to have the same value as if
+/// ValueType was loaded from Addr.
+///
+/// Mask: Value to mask with the value loaded from AlignAddr to
+/// include only the part that would've been loaded from Addr.
+///
+/// Inv_Mask: The inverse of Mask.
+
+static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
+ Type *ValueType, Value *Addr,
+ unsigned WordSize) {
+ PartwordMaskValues Ret;
+
BasicBlock *BB = I->getParent();
Function *F = BB->getParent();
+ Module *M = I->getModule();
+
LLVMContext &Ctx = F->getContext();
+ const DataLayout &DL = M->getDataLayout();
+
+ unsigned ValueSize = DL.getTypeStoreSize(ValueType);
+
+ assert(ValueSize < WordSize);
+
+ Ret.ValueType = ValueType;
+ Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
+
+ Type *WordPtrType =
+ Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+
+ Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
+ Ret.AlignedAddr = Builder.CreateIntToPtr(
+ Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
+ "AlignedAddr");
+
+ Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
+ if (DL.isLittleEndian()) {
+ // turn bytes into bits
+ Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ } else {
+ // turn bytes into bits, and count from the other side.
+ Ret.ShiftAmt =
+ Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
+ }
+
+ Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
+ Ret.Mask = Builder.CreateShl(
+ ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
+ "Mask");
+ Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
+
+ return Ret;
+}
+
+/// Emit IR to implement a masked version of a given atomicrmw
+/// operation. (That is, only the bits under the Mask should be
+/// affected by the operation)
+static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
+ IRBuilder<> &Builder, Value *Loaded,
+ Value *Shifted_Inc, Value *Inc,
+ const PartwordMaskValues &PMV) {
+ switch (Op) {
+ case AtomicRMWInst::Xchg: {
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ // Or/Xor won't affect any other bits, so can just be done
+ // directly.
+ return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Nand: {
+ // The other arithmetic ops need to be masked into place.
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin: {
+ // Finally, comparison ops will operate on the full value, so
+ // truncate down to the original size, and expand out again after
+ // doing the operation.
+ Value *Loaded_Shiftdown = Builder.CreateTrunc(
+ Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
+ Value *NewVal_Shiftup = Builder.CreateShl(
+ Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
+ return FinalVal;
+ }
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+/// Expand a sub-word atomicrmw operation into an appropriate
+/// word-sized operation.
+///
+/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
+/// way as a typical atomicrmw expansion. The only difference here is
+/// that the operation inside of the loop must operate only upon a
+/// part of the value.
+void AtomicExpand::expandPartwordAtomicRMW(
+ AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
+
+ assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
+
+ AtomicOrdering MemOpOrder = AI->getOrdering();
+
+ IRBuilder<> Builder(AI);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+
+ auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
+ ValOperand_Shifted, AI->getValOperand(), PMV);
+ };
+
+ // TODO: When we're ready to support LLSC conversions too, use
+ // insertRMWLLSCLoop here for ExpansionKind==LLSC.
+ Value *OldResult =
+ insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
+ PerformPartwordOp, createCmpXchgInstFun);
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+ // The basic idea here is that we're expanding a cmpxchg of a
+ // smaller memory size up to a word-sized cmpxchg. To do this, we
+ // need to add a retry-loop for strong cmpxchg, so that
+ // modifications to other parts of the word don't cause a spurious
+ // failure.
+
+ // This generates code like the following:
+ // [[Setup mask values PMV.*]]
+ // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
+ // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
+ // %InitLoaded = load i32* %addr
+ // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
+ // br partword.cmpxchg.loop
+ // partword.cmpxchg.loop:
+ // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
+ // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
+ // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
+ // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
+ // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
+ // i32 %FullWord_NewVal success_ordering failure_ordering
+ // %OldVal = extractvalue { i32, i1 } %NewCI, 0
+ // %Success = extractvalue { i32, i1 } %NewCI, 1
+ // br i1 %Success, label %partword.cmpxchg.end,
+ // label %partword.cmpxchg.failure
+ // partword.cmpxchg.failure:
+ // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
+ // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
+ // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
+ // label %partword.cmpxchg.end
+ // partword.cmpxchg.end:
+ // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
+ // %FinalOldVal = trunc i32 %tmp1 to i8
+ // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
+ // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
+
+ Value *Addr = CI->getPointerOperand();
+ Value *Cmp = CI->getCompareOperand();
+ Value *NewVal = CI->getNewValOperand();
+
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ IRBuilder<> Builder(CI);
+ LLVMContext &Ctx = Builder.getContext();
+
+ const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
+
+ BasicBlock *EndBB =
+ BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
+ auto FailureBB =
+ BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
+ auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
+
+ // The split call above "helpfully" added a branch at the end of BB
+ // (to the wrong place).
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+
+ PartwordMaskValues PMV = createMaskInstrs(
+ Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
+
+ // Shift the incoming values over, into the right location in the word.
+ Value *NewVal_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
+ Value *Cmp_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
+
+ // Load the entire current word, and mask into place the expected and new
+ // values
+ LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
+ InitLoaded->setVolatile(CI->isVolatile());
+ Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
+ Builder.CreateBr(LoopBB);
+
+ // partword.cmpxchg.loop:
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
+ Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
+
+ // Mask/Or the expected and new values into place in the loaded word.
+ Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
+ Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
+ AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
+ PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
+ CI->getFailureOrdering(), CI->getSynchScope());
+ NewCI->setVolatile(CI->isVolatile());
+ // When we're building a strong cmpxchg, we need a loop, so you
+ // might think we could use a weak cmpxchg inside. But, using strong
+ // allows the below comparison for ShouldContinue, and we're
+ // expecting the underlying cmpxchg to be a machine instruction,
+ // which is strong anyways.
+ NewCI->setWeak(CI->isWeak());
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Success = Builder.CreateExtractValue(NewCI, 1);
+
+ if (CI->isWeak())
+ Builder.CreateBr(EndBB);
+ else
+ Builder.CreateCondBr(Success, EndBB, FailureBB);
+
+ // partword.cmpxchg.failure:
+ Builder.SetInsertPoint(FailureBB);
+ // Upon failure, verify that the masked-out part of the loaded value
+ // has been modified. If it didn't, abort the cmpxchg, since the
+ // masked-in part must've.
+ Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
+ Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
+ Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
+
+ // Add the second value to the phi from above
+ Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
+
+ // partword.cmpxchg.end:
+ Builder.SetInsertPoint(CI);
+
+ Value *FinalOldVal = Builder.CreateTrunc(
+ Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ IRBuilder<> Builder(I);
+ Value *Loaded =
+ insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
+
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
+}
+
+Value *AtomicExpand::insertRMWLLSCLoop(
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Function *F = BB->getParent();
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
// [...]
- // fence?
// atomicrmw.start:
// %loaded = @load.linked(%addr)
// %new = some_op iN %loaded, %incr
@@ -413,17 +880,13 @@ bool AtomicExpand::expandAtomicOpToLLSC(
// %try_again = icmp i32 ne %stored, 0
// br i1 %try_again, label %loop, label %atomicrmw.end
// atomicrmw.end:
- // fence?
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
- // This grabs the DebugLoc from I.
- IRBuilder<> Builder(I);
-
// The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
+ // wrong place).
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
Builder.CreateBr(LoopBB);
@@ -441,13 +904,53 @@ bool AtomicExpand::expandAtomicOpToLLSC(
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return Loaded;
+}
- I->replaceAllUsesWith(Loaded);
- I->eraseFromParent();
+/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
+/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
+/// IR. As a migration step, we convert back to what use to be the standard
+/// way to represent a pointer cmpxchg so that we can update backends one by
+/// one.
+AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+ auto *M = CI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
+ M->getDataLayout());
- return true;
+ IRBuilder<> Builder(CI);
+
+ Value *Addr = CI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
+ Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
+
+
+ auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
+ CI->getSuccessOrdering(),
+ CI->getFailureOrdering(),
+ CI->getSynchScope());
+ NewCI->setVolatile(CI->isVolatile());
+ NewCI->setWeak(CI->isWeak());
+ DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Succ = Builder.CreateExtractValue(NewCI, 1);
+
+ OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
+
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, OldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Succ, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return NewCI;
}
+
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
@@ -455,37 +958,71 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
BasicBlock *BB = CI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
- // If getInsertFencesForAtomic() returns true, then the target does not want
- // to deal with memory orders, and emitLeading/TrailingFence should take care
- // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // If shouldInsertFencesForAtomic() returns true, then the target does not
+ // want to deal with memory orders, and emitLeading/TrailingFence should take
+ // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
// should preserve the ordering.
+ bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
+ ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
+
+ // In implementations which use a barrier to achieve release semantics, we can
+ // delay emitting this barrier until we know a store is actually going to be
+ // attempted. The cost of this delay is that we need 2 copies of the block
+ // emitting the load-linked, affecting code size.
+ //
+ // Ideally, this logic would be unconditional except for the minsize check
+ // since in other cases the extra blocks naturally collapse down to the
+ // minimal loop. Unfortunately, this puts too much stress on later
+ // optimisations so we avoid emitting the extra logic in those cases too.
+ bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
+ SuccessOrder != AtomicOrdering::Monotonic &&
+ SuccessOrder != AtomicOrdering::Acquire &&
+ !F->optForMinSize();
+
+ // There's no overhead for sinking the release barrier in a weak cmpxchg, so
+ // do it even on minsize.
+ bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
// The full expansion we produce is:
// [...]
- // fence?
// cmpxchg.start:
- // %loaded = @load.linked(%addr)
- // %should_store = icmp eq %loaded, %desired
- // br i1 %should_store, label %cmpxchg.trystore,
+ // %unreleasedload = @load.linked(%addr)
+ // %should_store = icmp eq %unreleasedload, %desired
+ // br i1 %should_store, label %cmpxchg.fencedstore,
// label %cmpxchg.nostore
+ // cmpxchg.releasingstore:
+ // fence?
+ // br label cmpxchg.trystore
// cmpxchg.trystore:
+ // %loaded.trystore = phi [%unreleasedload, %releasingstore],
+ // [%releasedload, %cmpxchg.releasedload]
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
- // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // br i1 %success, label %cmpxchg.success,
+ // label %cmpxchg.releasedload/%cmpxchg.failure
+ // cmpxchg.releasedload:
+ // %releasedload = @load.linked(%addr)
+ // %should_store = icmp eq %releasedload, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
// cmpxchg.nostore:
+ // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
+ // [%releasedload,
+ // %cmpxchg.releasedload/%cmpxchg.trystore]
// @load_linked_fail_balance()?
// br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
+ // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
@@ -494,8 +1031,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
- auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+ auto ReleasedLoadBB =
+ BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
+ auto TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
+ auto ReleasingStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
+ auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
// This grabs the DebugLoc from CI
IRBuilder<> Builder(CI);
@@ -505,32 +1047,55 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
- Builder.CreateBr(LoopBB);
+ if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *ShouldStore =
- Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+ Builder.SetInsertPoint(StartBB);
+ Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore = Builder.CreateICmpEQ(
+ UnreleasedLoad, CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+
+ Builder.SetInsertPoint(ReleasingStoreBB);
+ if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
Builder, CI->getNewValOperand(), Addr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : LoopBB);
-
- // Make sure later instructions don't get reordered with a fence if necessary.
+ CI->isWeak() ? FailureBB : RetryBB);
+
+ Builder.SetInsertPoint(ReleasedLoadBB);
+ Value *SecondLoad;
+ if (HasReleasedLoadBB) {
+ SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
+ "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ } else
+ Builder.CreateUnreachable();
+
+ // Make sure later instructions don't get reordered with a fence if
+ // necessary.
Builder.SetInsertPoint(SuccessBB);
- TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ if (ShouldInsertFencesForAtomic)
+ TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(NoStoreBB);
@@ -541,20 +1106,43 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateBr(FailureBB);
Builder.SetInsertPoint(FailureBB);
- TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ if (ShouldInsertFencesForAtomic)
+ TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
Builder.CreateBr(ExitBB);
// Finally, we have control-flow based knowledge of whether the cmpxchg
// succeeded or not. We expose this to later passes by converting any
- // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
-
- // Setup the builder so we can create any PHIs we need.
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
+ // PHI.
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+ // Setup the builder so we can create any PHIs we need.
+ Value *Loaded;
+ if (!HasReleasedLoadBB)
+ Loaded = UnreleasedLoad;
+ else {
+ Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
+ PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
+ PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
+ NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
+ PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
+ ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
+
+ Loaded = ExitLoaded;
+ }
+
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
SmallVector<ExtractValueInst *, 2> PrunedInsts;
@@ -620,16 +1208,14 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
return false;
}
-bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
- CreateCmpXchgInstFun CreateCmpXchg) {
- assert(AI);
-
- AtomicOrdering MemOpOrder =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
+Value *AtomicExpand::insertRMWCmpXchgLoop(
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
@@ -646,34 +1232,34 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
// br i1 %success, label %atomicrmw.end, label %loop
// atomicrmw.end:
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we want a load. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+ InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
Loaded->addIncoming(InitLoaded, BB);
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+ Value *NewVal = PerformOp(Builder, Loaded);
Value *NewLoaded = nullptr;
Value *Success = nullptr;
- CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal,
+ MemOpOrder == AtomicOrdering::Unordered
+ ? AtomicOrdering::Monotonic
+ : MemOpOrder,
Success, NewLoaded);
assert(Success && NewLoaded);
@@ -682,9 +1268,373 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
Builder.CreateCondBr(Success, ExitBB, LoopBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return NewLoaded;
+}
- AI->replaceAllUsesWith(NewLoaded);
+// Note: This function is exposed externally by AtomicExpandUtils.h
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ IRBuilder<> Builder(AI);
+ Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
+ Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
+ [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ },
+ CreateCmpXchg);
+
+ AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
+ return true;
+}
+// In order to use one of the sized library calls such as
+// __atomic_fetch_add_4, the alignment must be sufficient, the size
+// must be one of the potentially-specialized sizes, and the value
+// type must actually exist in C on the target (otherwise, the
+// function wouldn't actually be defined.)
+static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
+ const DataLayout &DL) {
+ // TODO: "LargestSize" is an approximation for "largest type that
+ // you can express in C". It seems to be the case that int128 is
+ // supported on all 64-bit platforms, otherwise only up to 64-bit
+ // integers are supported. If we get this wrong, then we'll try to
+ // call a sized libcall that doesn't actually exist. There should
+ // really be some more reliable way in LLVM of determining integer
+ // sizes which are valid in the target's C ABI...
+ unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
+ return Align >= Size &&
+ (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
+ Size <= LargestSize;
+}
+
+void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
+ RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
+}
+
+void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
+ RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
+}
+
+void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
+ I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
+ Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
+}
+
+static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
+ static const RTLIB::Libcall LibcallsXchg[6] = {
+ RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
+ RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
+ RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
+ static const RTLIB::Libcall LibcallsAdd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
+ RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
+ RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
+ static const RTLIB::Libcall LibcallsSub[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
+ RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
+ RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
+ static const RTLIB::Libcall LibcallsAnd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
+ RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
+ RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
+ static const RTLIB::Libcall LibcallsOr[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
+ RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
+ RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
+ static const RTLIB::Libcall LibcallsXor[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
+ RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
+ RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
+ static const RTLIB::Libcall LibcallsNand[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
+ RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
+ RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
+
+ switch (Op) {
+ case AtomicRMWInst::BAD_BINOP:
+ llvm_unreachable("Should not have BAD_BINOP.");
+ case AtomicRMWInst::Xchg:
+ return makeArrayRef(LibcallsXchg);
+ case AtomicRMWInst::Add:
+ return makeArrayRef(LibcallsAdd);
+ case AtomicRMWInst::Sub:
+ return makeArrayRef(LibcallsSub);
+ case AtomicRMWInst::And:
+ return makeArrayRef(LibcallsAnd);
+ case AtomicRMWInst::Or:
+ return makeArrayRef(LibcallsOr);
+ case AtomicRMWInst::Xor:
+ return makeArrayRef(LibcallsXor);
+ case AtomicRMWInst::Nand:
+ return makeArrayRef(LibcallsNand);
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // No atomic libcalls are available for max/min/umax/umin.
+ return {};
+ }
+ llvm_unreachable("Unexpected AtomicRMW operation.");
+}
+
+void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
+ ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
+
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool Success = false;
+ if (!Libcalls.empty())
+ Success = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+
+ // The expansion failed: either there were no libcalls at all for
+ // the operation (min/max), or there were only size-specialized
+ // libcalls (add/sub/etc) and we needed a generic. So, expand to a
+ // CAS libcall, via a CAS loop, instead.
+ if (!Success) {
+ expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ // Create the CAS instruction normally...
+ AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ // ...and then expand the CAS into a libcall.
+ expandAtomicCASToLibcall(Pair);
+ });
+ }
+}
+
+// A helper routine for the above expandAtomic*ToLibcall functions.
+//
+// 'Libcalls' contains an array of enum values for the particular
+// ATOMIC libcalls to be emitted. All of the other arguments besides
+// 'I' are extracted from the Instruction subclass by the
+// caller. Depending on the particular call, some will be null.
+bool AtomicExpand::expandAtomicOpToLibcall(
+ Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
+ Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
+ assert(Libcalls.size() == 6);
+
+ LLVMContext &Ctx = I->getContext();
+ Module *M = I->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IRBuilder<> Builder(I);
+ IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
+
+ bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
+ Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
+
+ unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
+
+ // TODO: the "order" argument type is "int", not int32. So
+ // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
+ ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
+ assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Constant *OrderingVal =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
+ Constant *Ordering2Val = nullptr;
+ if (CASExpected) {
+ assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Ordering2Val =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
+ }
+ bool HasResult = I->getType() != Type::getVoidTy(Ctx);
+
+ RTLIB::Libcall RTLibType;
+ if (UseSizedLibcall) {
+ switch (Size) {
+ case 1: RTLibType = Libcalls[1]; break;
+ case 2: RTLibType = Libcalls[2]; break;
+ case 4: RTLibType = Libcalls[3]; break;
+ case 8: RTLibType = Libcalls[4]; break;
+ case 16: RTLibType = Libcalls[5]; break;
+ }
+ } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
+ RTLibType = Libcalls[0];
+ } else {
+ // Can't use sized function, and there's no generic for this
+ // operation, so give up.
+ return false;
+ }
+
+ // Build up the function call. There's two kinds. First, the sized
+ // variants. These calls are going to be one of the following (with
+ // N=1,2,4,8,16):
+ // iN __atomic_load_N(iN *ptr, int ordering)
+ // void __atomic_store_N(iN *ptr, iN val, int ordering)
+ // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
+ // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
+ // int success_order, int failure_order)
+ //
+ // Note that these functions can be used for non-integer atomic
+ // operations, the values just need to be bitcast to integers on the
+ // way in and out.
+ //
+ // And, then, the generic variants. They look like the following:
+ // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
+ // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
+ // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
+ // int ordering)
+ // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
+ // void *desired, int success_order,
+ // int failure_order)
+ //
+ // The different signatures are built up depending on the
+ // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
+ // variables.
+
+ AllocaInst *AllocaCASExpected = nullptr;
+ Value *AllocaCASExpected_i8 = nullptr;
+ AllocaInst *AllocaValue = nullptr;
+ Value *AllocaValue_i8 = nullptr;
+ AllocaInst *AllocaResult = nullptr;
+ Value *AllocaResult_i8 = nullptr;
+
+ Type *ResultTy;
+ SmallVector<Value *, 6> Args;
+ AttributeSet Attr;
+
+ // 'size' argument.
+ if (!UseSizedLibcall) {
+ // Note, getIntPtrType is assumed equivalent to size_t.
+ Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
+ }
+
+ // 'ptr' argument.
+ Value *PtrVal =
+ Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
+ Args.push_back(PtrVal);
+
+ // 'expected' argument, if present.
+ if (CASExpected) {
+ AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
+ AllocaCASExpected->setAlignment(AllocaAlignment);
+ AllocaCASExpected_i8 =
+ Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
+ Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
+ Args.push_back(AllocaCASExpected_i8);
+ }
+
+ // 'val' argument ('desired' for cas), if present.
+ if (ValueOperand) {
+ if (UseSizedLibcall) {
+ Value *IntValue =
+ Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
+ Args.push_back(IntValue);
+ } else {
+ AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
+ AllocaValue->setAlignment(AllocaAlignment);
+ AllocaValue_i8 =
+ Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
+ Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
+ Args.push_back(AllocaValue_i8);
+ }
+ }
+
+ // 'ret' argument.
+ if (!CASExpected && HasResult && !UseSizedLibcall) {
+ AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
+ AllocaResult->setAlignment(AllocaAlignment);
+ AllocaResult_i8 =
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
+ Args.push_back(AllocaResult_i8);
+ }
+
+ // 'ordering' ('success_order' for cas) argument.
+ Args.push_back(OrderingVal);
+
+ // 'failure_order' argument, if present.
+ if (Ordering2Val)
+ Args.push_back(Ordering2Val);
+
+ // Now, the return type.
+ if (CASExpected) {
+ ResultTy = Type::getInt1Ty(Ctx);
+ Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
+ } else if (HasResult && UseSizedLibcall)
+ ResultTy = SizedIntTy;
+ else
+ ResultTy = Type::getVoidTy(Ctx);
+
+ // Done with setting up arguments and return types, create the call:
+ SmallVector<Type *, 6> ArgTys;
+ for (Value *Arg : Args)
+ ArgTys.push_back(Arg->getType());
+ FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
+ Constant *LibcallFn =
+ M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
+ CallInst *Call = Builder.CreateCall(LibcallFn, Args);
+ Call->setAttributes(Attr);
+ Value *Result = Call;
+
+ // And then, extract the results...
+ if (ValueOperand && !UseSizedLibcall)
+ Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
+
+ if (CASExpected) {
+ // The final result from the CAS is {load of 'expected' alloca, bool result
+ // from call}
+ Type *FinalResultTy = I->getType();
+ Value *V = UndefValue::get(FinalResultTy);
+ Value *ExpectedOut =
+ Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
+ V = Builder.CreateInsertValue(V, ExpectedOut, 0);
+ V = Builder.CreateInsertValue(V, Result, 1);
+ I->replaceAllUsesWith(V);
+ } else if (HasResult) {
+ Value *V;
+ if (UseSizedLibcall)
+ V = Builder.CreateBitOrPointerCast(Result, I->getType());
+ else {
+ V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
+ }
+ I->replaceAllUsesWith(V);
+ }
+ I->eraseFromParent();
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index df5cac5..5dacbf9 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -27,10 +27,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -90,7 +91,7 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder",
"Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
@@ -98,8 +99,9 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
// HW that requires structurized CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge();
- BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
- getAnalysis<MachineBlockFrequencyInfo>(),
+ BranchFolder::MBFIWrapper MBBFreqInfo(
+ getAnalysis<MachineBlockFrequencyInfo>());
+ BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
getAnalysis<MachineBranchProbabilityInfo>());
return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
MF.getSubtarget().getRegisterInfo(),
@@ -107,7 +109,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
- const MachineBlockFrequencyInfo &FreqInfo,
+ MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo)
: EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
MBPI(ProbInfo) {
@@ -135,6 +137,8 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
FuncletMembership.erase(MBB);
+ if (MLI)
+ MLI->removeBlock(MBB);
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@@ -167,7 +171,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator FirstTerm = I;
while (I != MBB->end()) {
- if (!TII->isUnpredicatedTerminator(I))
+ if (!TII->isUnpredicatedTerminator(*I))
return false;
// See if it uses any of the implicitly defined registers.
for (const MachineOperand &MO : I->operands()) {
@@ -191,25 +195,26 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
}
/// OptimizeFunction - Perhaps branch folding, tail merging and other
-/// CFG optimizations on the given function.
+/// CFG optimizations on the given function. Block placement changes the layout
+/// and may create new tail merging opportunities.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
- MachineModuleInfo *mmi) {
+ MachineModuleInfo *mmi,
+ MachineLoopInfo *mli, bool AfterPlacement) {
if (!tii) return false;
TriedMerging.clear();
+ AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
MMI = mmi;
- RS = nullptr;
+ MLI = mli;
- // Use a RegScavenger to help update liveness when required.
MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
- RS = new RegScavenger();
- else
+ UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
+ if (!UpdateLiveIns)
MRI.invalidateLiveness();
// Fix CFG. The later algorithms expect it to be right.
@@ -217,7 +222,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true))
+ if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
MadeChange |= OptimizeImpDefsBlock(&MBB);
}
@@ -228,7 +233,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = TailMergeBlocks(MF);
- MadeChangeThisIteration |= OptimizeBranches(MF);
+ // No need to clean up if tail merging does not change anything after the
+ // block placement.
+ if (!AfterBlockPlacement || MadeChangeThisIteration)
+ MadeChangeThisIteration |= OptimizeBranches(MF);
if (EnableHoistCommonCode)
MadeChangeThisIteration |= HoistCommonCode(MF);
MadeChange |= MadeChangeThisIteration;
@@ -237,10 +245,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// See if any jump tables have become dead as the code generator
// did its thing.
MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
- if (!JTI) {
- delete RS;
+ if (!JTI)
return MadeChange;
- }
// Walk the function to find jump tables that are live.
BitVector JTIsLive(JTI->getJumpTables().size());
@@ -262,7 +268,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
MadeChange = true;
}
- delete RS;
return MadeChange;
}
@@ -271,10 +276,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
//===----------------------------------------------------------------------===//
/// HashMachineInstr - Compute a hash value for MI and its operands.
-static unsigned HashMachineInstr(const MachineInstr *MI) {
- unsigned Hash = MI->getOpcode();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &Op = MI->getOperand(i);
+static unsigned HashMachineInstr(const MachineInstr &MI) {
+ unsigned Hash = MI.getOpcode();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI.getOperand(i);
// Merge in bits from the operand if easy. We can't use MachineOperand's
// hash_code here because it's not deterministic and we sort by hash value
@@ -311,12 +316,12 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
}
/// HashEndOfMBB - Hash the last instruction in the MBB.
-static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
- MachineBasicBlock::const_iterator I = MBB->getLastNonDebugInstr();
- if (I == MBB->end())
+static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
return 0;
- return HashMachineInstr(I);
+ return HashMachineInstr(*I);
}
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
@@ -357,7 +362,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
--I2;
}
// I1, I2==first (untested) non-DBGs preceding known match
- if (!I1->isIdenticalTo(I2) ||
+ if (!I1->isIdenticalTo(*I2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
@@ -394,15 +399,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
- MachineBasicBlock *NewMBB) {
- if (RS) {
- RS->enterBasicBlock(CurMBB);
- if (!CurMBB->empty())
- RS->forward(std::prev(CurMBB->end()));
- for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++)
- if (RS->isRegUsed(i, false))
- NewMBB->addLiveIn(i);
+void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) {
+ if (!UpdateLiveIns)
+ return;
+
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ LiveRegs.stepBackward(MI);
+
+ for (unsigned Reg : LiveRegs) {
+ // Skip the register if we are about to add one of its super registers.
+ bool ContainsSuperReg = false;
+ for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) {
+ if (LiveRegs.contains(*SReg)) {
+ ContainsSuperReg = true;
+ break;
+ }
+ }
+ if (ContainsSuperReg)
+ continue;
+ MBB.addLiveIn(Reg);
}
}
@@ -410,12 +427,9 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
- MachineBasicBlock *CurMBB = OldInst->getParent();
-
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
- // For targets that use the register scavenger, we must maintain LiveIns.
- MaintainLiveIns(CurMBB, NewDest);
+ computeLiveIns(*NewDest);
++NumTailMerge;
}
@@ -445,16 +459,22 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+ // NewMBB belongs to the same loop as CurMBB.
+ if (MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
+ ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
+
// NewMBB inherits CurMBB's block frequency.
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
- // For targets that use the register scavenger, we must maintain LiveIns.
- MaintainLiveIns(&CurMBB, NewMBB);
+ computeLiveIns(*NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
- if (FuncletI != FuncletMembership.end())
- FuncletMembership[NewMBB] = FuncletI->second;
+ if (FuncletI != FuncletMembership.end()) {
+ auto n = FuncletI->second;
+ FuncletMembership[NewMBB] = n;
+ }
return NewMBB;
}
@@ -488,8 +508,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
DebugLoc dl; // FIXME: this is nowhere
- if (I != MF->end() &&
- !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
@@ -537,6 +556,18 @@ void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
MergedBBFreq[MBB] = F;
}
+raw_ostream &
+BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
+}
+
+raw_ostream &
+BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI.printBlockFreq(OS, Freq);
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -745,11 +776,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
}
static void
-removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
- MachineBasicBlock &MBBCommon) {
- // Remove MMOs from memory operations in the common block
- // when they do not match the ones from the block being tail-merged.
- // This ensures later passes conservatively compute dependencies.
+mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
+ MachineBasicBlock &MBBCommon) {
MachineBasicBlock *MBB = MBBIStartPos->getParent();
// Note CommonTailLen does not necessarily matches the size of
// the common BB nor all its instructions because of debug
@@ -777,24 +805,33 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBICommon != MBBIECommon &&
"Reached BB end within common tail length!");
- assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
+ assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
+ // Merge MMOs from memory operations in the common block.
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
+ // Drop undef flags if they aren't present in all merged instructions.
+ for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBICommon->getOperand(I);
+ if (MO.isReg() && MO.isUndef()) {
+ const MachineOperand &OtherMO = MBBI->getOperand(I);
+ if (!OtherMO.isUndef())
+ MO.setIsUndef(false);
+ }
+ }
++MBBI;
++MBBICommon;
}
}
-// See if any of the blocks in MergePotentials (which all have a common single
-// successor, or all have no successor) can be tail-merged. If there is a
-// successor, any blocks in MergePotentials that are not tail-merged and
-// are not immediately before Succ must have an unconditional branch to
-// Succ added (but the predecessor/successor lists need no adjustment).
-// The lone predecessor of Succ that falls through into Succ,
+// See if any of the blocks in MergePotentials (which all have SuccBB as a
+// successor, or all have no successor if it is null) can be tail-merged.
+// If there is a successor, any blocks in MergePotentials that are not
+// tail-merged and are not immediately before Succ must have an unconditional
+// branch to Succ added (but the predecessor/successor lists need no
+// adjustment). The lone predecessor of Succ that falls through into Succ,
// if any, is given in PredBB.
-
bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
bool MadeChange = false;
@@ -888,7 +925,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- // Recompute commont tail MBB's edge weights and block frequency.
+ // Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
// MBB is common tail. Adjust all other BB's to jump to this one.
@@ -900,8 +937,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
continue;
DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
<< (i == e-1 ? "" : ", "));
- // Remove MMOs from memory operations as needed.
- removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
+ // Merge operations (MMOs, undef flags)
+ mergeOperations(SameTails[i].getTailStartPos(), *MBB);
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
@@ -920,23 +957,27 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return MadeChange;
// First find blocks with no successors.
- MergePotentials.clear();
- for (MachineBasicBlock &MBB : MF) {
- if (MergePotentials.size() == TailMergeThreshold)
- break;
- if (!TriedMerging.count(&MBB) && MBB.succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB));
- }
+ // Block placement does not create new tail merging opportunities for these
+ // blocks.
+ if (!AfterBlockPlacement) {
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
+ }
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
- if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
- // See if we can do any tail merging on those.
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ }
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
@@ -964,6 +1005,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
MachineBasicBlock *IBB = &*I;
MachineBasicBlock *PredBB = &*std::prev(I);
MergePotentials.clear();
+ MachineLoop *ML;
+
+ // Bail if merging after placement and IBB is the loop header because
+ // -- If merging predecessors that belong to the same loop as IBB, the
+ // common tail of merged predecessors may become the loop top if block
+ // placement is called again and the predecessors may branch to this common
+ // tail and require more branches. This can be relaxed if
+ // MachineBlockPlacement::findBestLoopTop is more flexible.
+ // --If merging predecessors that do not belong to the same loop as IBB, the
+ // loop info of IBB's loop and the other loops may be affected. Calling the
+ // block placement again may make big change to the layout and eliminate the
+ // reason to do tail merging here.
+ if (AfterBlockPlacement && MLI) {
+ ML = MLI->getLoopFor(IBB);
+ if (ML && IBB == ML->getHeader())
+ continue;
+ }
+
for (MachineBasicBlock *PBB : I->predecessors()) {
if (MergePotentials.size() == TailMergeThreshold)
break;
@@ -983,9 +1042,16 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->hasEHPadSuccessor())
continue;
+ // After block placement, only consider predecessors that belong to the
+ // same loop as IBB. The reason is the same as above when skipping loop
+ // header.
+ if (AfterBlockPlacement && MLI)
+ if (ML != MLI->getLoopFor(PBB))
+ continue;
+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
// Failing case: IBB is the target of a cbr, and we cannot reverse the
// branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
@@ -1033,7 +1099,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
NewCond, dl);
}
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
}
}
@@ -1211,7 +1277,7 @@ ReoptimizeBlock:
// where a BB jumps to more than one landing pad.
// TODO: Is it ever worth rewriting predecessors which don't already
// jump to a landing pad, and so can safely jump to the fallthrough?
- } else {
+ } else if (MBB->isSuccessor(&*FallThrough)) {
// Rewrite all predecessors of the old block to go to the fallthrough
// instead.
while (!MBB->pred_empty()) {
@@ -1234,7 +1300,7 @@ ReoptimizeBlock:
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
bool PriorUnAnalyzable =
- TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
if (!PriorUnAnalyzable) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
@@ -1275,11 +1341,11 @@ ReoptimizeBlock:
// DBG_VALUE at the beginning of MBB.
while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
&& PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
- if (!MBBIter->isIdenticalTo(PrevBBIter))
+ if (!MBBIter->isIdenticalTo(*PrevBBIter))
break;
- MachineInstr *DuplicateDbg = MBBIter;
+ MachineInstr &DuplicateDbg = *MBBIter;
++MBBIter; -- PrevBBIter;
- DuplicateDbg->eraseFromParent();
+ DuplicateDbg.eraseFromParent();
}
}
PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
@@ -1371,7 +1437,8 @@ ReoptimizeBlock:
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
- bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ bool CurUnAnalyzable =
+ TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
if (!CurUnAnalyzable) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
@@ -1455,8 +1522,8 @@ ReoptimizeBlock:
// change this to an unconditional branch (and fix the CFG).
MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
SmallVector<MachineOperand, 4> NewCurCond;
- bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
- NewCurFBB, NewCurCond, true);
+ bool NewCurUnAnalyzable = TII->analyzeBranch(
+ *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
DebugLoc pdl = getBranchDebugLoc(*PMBB);
TII->RemoveBranch(*PMBB);
@@ -1502,9 +1569,9 @@ ReoptimizeBlock:
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (PredBB != MBB && !PredBB->canFallThrough() &&
- !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
- && (!CurFallsThru || !CurTBB || !CurFBB)
- && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&
+ (!CurFallsThru || !CurTBB || !CurFBB) &&
+ (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
// If the current block doesn't fall through, just move it.
// If the current block can fall through and does not end with a
// conditional branch, we need to append an unconditional jump to
@@ -1560,7 +1627,7 @@ ReoptimizeBlock:
// Now check to see if the current block is sitting between PrevBB and
// a block to which it could fall through.
if (FallThrough != MF.end() &&
- !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(&*FallThrough)) {
MBB->moveAfter(&MF.back());
MadeChange = true;
@@ -1623,7 +1690,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
SmallSet<unsigned,4> &Uses,
SmallSet<unsigned,4> &Defs) {
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
- if (!TII->isUnpredicatedTerminator(Loc))
+ if (!TII->isUnpredicatedTerminator(*Loc))
return MBB->end();
for (const MachineOperand &MO : Loc->operands()) {
@@ -1685,7 +1752,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// Also avoid moving code above predicated instruction since it's hard to
// reason about register liveness with predicated instruction.
bool DontMoveAcrossStore = true;
- if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI))
+ if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
return MBB->end();
@@ -1719,7 +1786,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
return false;
if (!FBB) FBB = findFalseBlock(MBB, TBB);
@@ -1762,10 +1829,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (FIB == FIE)
break;
}
- if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead))
+ if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
break;
- if (TII->isPredicated(TIB))
+ if (TII->isPredicated(*TIB))
// Hard to reason about register liveness with predicated instruction.
break;
@@ -1844,7 +1911,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
unsigned Reg = MO.getReg();
- if (!Reg)
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
continue;
LocalDefs.push_back(Reg);
addRegAndItsAliases(Reg, TRI, LocalDefsSet);
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index d759d53..36a5a2e 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -11,6 +11,7 @@
#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/BlockFrequency.h"
#include <vector>
@@ -20,20 +21,23 @@ namespace llvm {
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineModuleInfo;
- class RegScavenger;
+ class MachineLoopInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
+ class MBFIWrapper;
+
explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
- const MachineBlockFrequencyInfo &MBFI,
+ MBFIWrapper &MBFI,
const MachineBranchProbabilityInfo &MBPI);
- bool OptimizeFunction(MachineFunction &MF,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- MachineModuleInfo *mmi);
+ bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
+ MachineLoopInfo *mli = nullptr,
+ bool AfterPlacement = false);
+
private:
class MergePotentialsElt {
unsigned Hash;
@@ -91,13 +95,17 @@ namespace llvm {
};
std::vector<SameTailElt> SameTails;
+ bool AfterBlockPlacement;
bool EnableTailMerge;
bool EnableHoistCommonCode;
+ bool UpdateLiveIns;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
- RegScavenger *RS;
+ MachineLoopInfo *MLI;
+ LivePhysRegs LiveRegs;
+ public:
/// \brief This class keeps track of branch frequencies of newly created
/// blocks and tail-merged blocks.
class MBFIWrapper {
@@ -105,21 +113,25 @@ namespace llvm {
MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const;
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const;
private:
const MachineBlockFrequencyInfo &MBFI;
DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
};
- MBFIWrapper MBBFreqInfo;
+ private:
+ MBFIWrapper &MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
- void MaintainLiveIns(MachineBasicBlock *CurMBB,
- MachineBasicBlock *NewMBB);
+ void computeLiveIns(MachineBasicBlock &MBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
new file mode 100644
index 0000000..ff7c99d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
@@ -0,0 +1,139 @@
+//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the boilerplate required to define our various built in
+// gc lowering strategies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+
+/// An example GC which attempts to be compatibile with Erlang/OTP garbage
+/// collector.
+///
+/// The frametable emitter is in ErlangGCPrinter.cpp.
+class ErlangGC : public GCStrategy {
+public:
+ ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ }
+};
+
+/// An example GC which attempts to be compatible with Objective Caml 3.10.0
+///
+/// The frametable emitter is in OcamlGCPrinter.cpp.
+class OcamlGC : public GCStrategy {
+public:
+ OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ }
+};
+
+/// A GC strategy for uncooperative targets. This implements lowering for the
+/// llvm.gc* intrinsics for targets that do not natively support them (which
+/// includes the C backend). Note that the code generated is not quite as
+/// efficient as algorithms which generate stack maps to identify roots.
+///
+/// In order to support this particular transformation, all stack roots are
+/// coallocated in the stack. This allows a fully target-independent stack map
+/// while introducing only minor runtime overhead.
+class ShadowStackGC : public GCStrategy {
+public:
+ ShadowStackGC() {
+ InitRoots = true;
+ CustomRoots = true;
+ }
+};
+
+/// A GCStrategy which serves as an example for the usage of a statepoint based
+/// lowering strategy. This GCStrategy is intended to suitable as a default
+/// implementation usable with any collector which can consume the standard
+/// stackmap format generated by statepoints, uses the default addrespace to
+/// distinguish between gc managed and non-gc managed pointers, and has
+/// reasonable relocation semantics.
+class StatepointGC : public GCStrategy {
+public:
+ StatepointGC() {
+ UseStatepoints = true;
+ // These options are all gc.root specific, we specify them so that the
+ // gc.root lowering code doesn't run.
+ InitRoots = false;
+ NeededSafePoints = 0;
+ UsesMetadata = false;
+ CustomRoots = false;
+ }
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
+ // Method is only valid on pointer typed values.
+ const PointerType *PT = cast<PointerType>(Ty);
+ // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+ // GC managed heap. We know that a pointer into this heap needs to be
+ // updated and that no other pointer does. Note that addrspace(1) is used
+ // only as an example, it has no special meaning, and is not reserved for
+ // GC usage.
+ return (1 == PT->getAddressSpace());
+ }
+};
+
+/// A GCStrategy for the CoreCLR Runtime. The strategy is similar to
+/// Statepoint-example GC, but differs from it in certain aspects, such as:
+/// 1) Base-pointers need not be explicitly tracked and reported for
+/// interior pointers
+/// 2) Uses a different format for encoding stack-maps
+/// 3) Location of Safe-point polls: polls are only needed before loop-back
+/// edges and before tail-calls (not needed at function-entry)
+///
+/// The above differences in behavior are to be implemented in upcoming
+/// checkins.
+class CoreCLRGC : public GCStrategy {
+public:
+ CoreCLRGC() {
+ UseStatepoints = true;
+ // These options are all gc.root specific, we specify them so that the
+ // gc.root lowering code doesn't run.
+ InitRoots = false;
+ NeededSafePoints = 0;
+ UsesMetadata = false;
+ CustomRoots = false;
+ }
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
+ // Method is only valid on pointer typed values.
+ const PointerType *PT = cast<PointerType>(Ty);
+ // We pick addrspace(1) as our GC managed heap.
+ return (1 == PT->getAddressSpace());
+ }
+};
+}
+
+// Register all the above so that they can be found at runtime. Note that
+// these static initializers are important since the registration list is
+// constructed from their storage.
+static GCRegistry::Add<ErlangGC> A("erlang",
+ "erlang-compatible garbage collector");
+static GCRegistry::Add<OcamlGC> B("ocaml", "ocaml 3.10-compatible GC");
+static GCRegistry::Add<ShadowStackGC>
+ C("shadow-stack", "Very portable GC for uncooperative code generators");
+static GCRegistry::Add<StatepointGC> D("statepoint-example",
+ "an example strategy for statepoint");
+static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
+
+// Provide hooks to ensure the containing library is fully loaded.
+void llvm::linkErlangGC() {}
+void llvm::linkOcamlGC() {}
+void llvm::linkShadowStackGC() {}
+void llvm::linkStatepointExampleGC() {}
+void llvm::linkCoreCLRGC() {}
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 26aa46f..dc2d38a 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -121,7 +121,7 @@ static bool isRematerializable(const LiveInterval &LI,
}
}
- if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
+ if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
return false;
}
return true;
@@ -170,8 +170,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// Calculate instr weight.
bool reads, writes;
std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
- weight = LiveIntervals::getSpillWeight(
- writes, reads, &MBFI, mi);
+ weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi);
// Give extra weight to what looks like a loop induction variable update.
if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
@@ -192,11 +191,15 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isPhysicalRegister(hint)) {
- if (hweight > bestPhys && mri.isAllocatable(hint))
- bestPhys = hweight, hintPhys = hint;
+ if (hweight > bestPhys && mri.isAllocatable(hint)) {
+ bestPhys = hweight;
+ hintPhys = hint;
+ }
} else {
- if (hweight > bestVirt)
- bestVirt = hweight, hintVirt = hint;
+ if (hweight > bestVirt) {
+ bestVirt = hweight;
+ hintVirt = hint;
+ }
}
}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 23c0d54..7d67bcf 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -51,9 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
Size = MinSize;
if (MinAlign > (int)Align)
Align = MinAlign;
- MF.getFrameInfo()->ensureMaxAlignment(Align);
+ ensureMaxAlignment(Align);
MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
- Size = unsigned(RoundUpToAlignment(Size, MinAlign));
+ Size = unsigned(alignTo(Size, MinAlign));
unsigned Offset = AllocateStack(Size, Align);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
@@ -236,6 +236,7 @@ void CCState::analyzeMustTailForwardedRegisters(
// variadic functions, so we need to assume we're not variadic so that we get
// all the registers that might be used in a non-variadic call.
SaveAndRestore<bool> SavedVarArg(IsVarArg, false);
+ SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true);
for (MVT RegVT : RegParmTypes) {
SmallVector<MCPhysReg, 8> RemainingRegs;
@@ -248,3 +249,39 @@ void CCState::analyzeMustTailForwardedRegisters(
}
}
}
+
+bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, MachineFunction &MF,
+ LLVMContext &C,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn CalleeFn, CCAssignFn CallerFn) {
+ if (CalleeCC == CallerCC)
+ return true;
+ SmallVector<CCValAssign, 4> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, MF, RVLocs1, C);
+ CCInfo1.AnalyzeCallResult(Ins, CalleeFn);
+
+ SmallVector<CCValAssign, 4> RVLocs2;
+ CCState CCInfo2(CallerCC, false, MF, RVLocs2, C);
+ CCInfo2.AnalyzeCallResult(Ins, CallerFn);
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) {
+ const CCValAssign &Loc1 = RVLocs1[I];
+ const CCValAssign &Loc2 = RVLocs2[I];
+ if (Loc1.getLocInfo() != Loc2.getLocInfo())
+ return false;
+ bool RegLoc1 = Loc1.isRegLoc();
+ if (RegLoc1 != Loc2.isRegLoc())
+ return false;
+ if (RegLoc1) {
+ if (Loc1.getLocReg() != Loc2.getLocReg())
+ return false;
+ } else {
+ if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index dc13b5b..6679819 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
@@ -33,6 +34,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
+ initializeInterleavedAccessPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
@@ -55,26 +57,32 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
+ initializeXRayInstrumentationPass(Registry);
+ initializePatchableFunctionPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
+ initializePostRAHazardRecognizerPass(Registry);
initializePostRASchedulerPass(Registry);
+ initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeRegisterCoalescerPass(Registry);
+ initializeRenameIndependentSubregsPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
initializeLiveDebugValuesPass(Registry);
+ initializeSafeStackPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
- initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableBlockElimLegacyPassPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c8007a5..ede4041 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -18,9 +18,11 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -38,6 +40,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -111,6 +114,10 @@ static cl::opt<bool> StressExtLdPromotion(
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -122,6 +129,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -158,9 +166,10 @@ class TypePromotionTransaction;
const char *getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
+ // FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
}
private:
@@ -203,7 +212,7 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
}
bool CodeGenPrepare::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+ if (skipFunction(F))
return false;
DL = &F.getParent()->getDataLayout();
@@ -218,6 +227,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
/// This optimization identifies DIV instructions that can be
@@ -359,6 +369,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
/// edges in ways that are non-optimal for isel. Start by eliminating these
/// blocks so we can split them the way we want them.
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ LoopList.insert(LoopList.end(), L->begin(), L->end());
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
@@ -391,6 +410,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
if (!canMergeBlocks(BB, DestBB))
continue;
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+ !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+ continue;
+
eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
@@ -612,7 +639,8 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
continue;
// Create a Builder and replace the target callsite with a gep
- assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+ assert(RelocatedBase->getNextNode() &&
+ "Should always have one since it's not a terminator");
// Insert after RelocatedBase
IRBuilder<> Builder(RelocatedBase->getNextNode());
@@ -730,6 +758,11 @@ static bool SinkCast(CastInst *CI) {
// Preincrement use iterator so we don't invalidate it.
++UI;
+ // The first insertion point of a block containing an EH pad is after the
+ // pad. If the pad is the user, we cannot sink the cast past the pad.
+ if (User->isEHPad())
+ continue;
+
// If the block selected to receive the cast is an EH pad that does not
// allow non-PHI instructions before the terminator, we can't sink the
// cast.
@@ -854,10 +887,14 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
BasicBlock *DefBB = CI->getParent();
- /// Only insert a cmp in each block once.
+ // Avoid sinking soft-FP comparisons, since this can move them into a loop.
+ if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ return false;
+
+ // Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
@@ -905,8 +942,8 @@ static bool SinkCmpExpression(CmpInst *CI) {
return MadeChange;
}
-static bool OptimizeCmpExpression(CmpInst *CI) {
- if (SinkCmpExpression(CI))
+static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
+ if (SinkCmpExpression(CI, TLI))
return true;
if (CombineUAddWithOverflow(CI))
@@ -1138,7 +1175,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// %13 = icmp eq i1 %12, true
// br i1 %13, label %cond.load4, label %else5
//
-static void ScalarizeMaskedLoad(CallInst *CI) {
+static void scalarizeMaskedLoad(CallInst *CI) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -1284,7 +1321,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// store i32 %8, i32* %9
// br label %else2
// . . .
-static void ScalarizeMaskedStore(CallInst *CI) {
+static void scalarizeMaskedStore(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -1403,7 +1440,7 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// . . .
// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
-static void ScalarizeMaskedGather(CallInst *CI) {
+static void scalarizeMaskedGather(CallInst *CI) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -1538,7 +1575,7 @@ static void ScalarizeMaskedGather(CallInst *CI) {
// store i32 % Elt1, i32* % Ptr1, align 4
// br label %else2
// . . .
-static void ScalarizeMaskedScatter(CallInst *CI) {
+static void scalarizeMaskedScatter(CallInst *CI) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -1653,7 +1690,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Only handle legal scalar cases. Anything else requires too much work.
Type *Ty = CountZeros->getType();
unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
- if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
@@ -1743,8 +1780,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// forbidden.
GlobalVariable *GV;
if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
- GV->getAlignment() < PrefAlign &&
- DL->getTypeAllocSize(GV->getType()->getElementType()) >=
+ GV->getPointerAlignment(*DL) < PrefAlign &&
+ DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
GV->setAlignment(PrefAlign);
}
@@ -1759,27 +1796,47 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
}
+ // If we have a cold call site, try to sink addressing computation into the
+ // cold block. This interacts with our handling for loads and stores to
+ // ensure that we can fold all uses of a potential addressing computation
+ // into their uses. TODO: generalize this to work over profiling data
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ for (auto &Arg : CI->arg_operands()) {
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned AS = Arg->getType()->getPointerAddressSpace();
+ return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+ uint64_t Size;
Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
+ Constant *RetVal = nullptr;
+ ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
+ ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
+ if (getObjectSize(II->getArgOperand(0),
+ Size, *DL, TLInfo, false, Mode)) {
+ RetVal = ConstantInt::get(ReturnTy, Size);
+ } else {
+ RetVal = ConstantInt::get(ReturnTy,
+ Mode == ObjSizeMode::Min ? 0 : -1ULL);
+ }
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
- WeakVH IterHandle(&*CurInstIterator);
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
- replaceAndRecursivelySimplify(CI, RetVal,
- TLInfo, nullptr);
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
- if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
+ if (IterHandle != CurValue) {
CurInstIterator = BB->begin();
SunkAddrs.clear();
}
@@ -1788,7 +1845,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
if (!TTI->isLegalMaskedLoad(CI->getType())) {
- ScalarizeMaskedLoad(CI);
+ scalarizeMaskedLoad(CI);
ModifiedDT = true;
return true;
}
@@ -1796,7 +1853,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_store: {
if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- ScalarizeMaskedStore(CI);
+ scalarizeMaskedStore(CI);
ModifiedDT = true;
return true;
}
@@ -1804,7 +1861,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_gather: {
if (!TTI->isLegalMaskedGather(CI->getType())) {
- ScalarizeMaskedGather(CI);
+ scalarizeMaskedGather(CI);
ModifiedDT = true;
return true;
}
@@ -1812,7 +1869,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
case Intrinsic::masked_scatter: {
if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- ScalarizeMaskedScatter(CI);
+ scalarizeMaskedScatter(CI);
ModifiedDT = true;
return true;
}
@@ -2076,7 +2133,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
+LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
print(dbgs());
dbgs() << '\n';
}
@@ -3442,6 +3499,8 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
+ const bool OptSize = I->getFunction()->optForSize();
+
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
Instruction *UserI = cast<Instruction>(U.getUser());
@@ -3459,6 +3518,11 @@ static bool FindAllMemoryUses(
}
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ continue;
+
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
if (!IA) return true;
@@ -3550,10 +3614,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
if (!BaseReg && !ScaledReg)
return true;
- // If all uses of this instruction are ultimately load/store/inlineasm's,
- // check to see if their addressing modes will include this instruction. If
- // so, we can fold it into all uses, so it doesn't matter if it has multiple
- // uses.
+ // If all uses of this instruction can have the address mode sunk into them,
+ // we can remove the addressing mode and effectively trade one live register
+ // for another (at worst.) In this context, folding an addressing mode into
+ // the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
@@ -3561,8 +3625,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Now that we know that all uses of this instruction are part of a chain of
// computation involving only operations that could theoretically be folded
- // into a memory use, loop over each of these uses and see if they could
- // *actually* fold the instruction.
+ // into a memory use, loop over each of these memory operation uses and see
+ // if they could *actually* fold the instruction. The assumption is that
+ // addressing modes are cheap and that duplicating the computation involved
+ // many times is worthwhile, even on a fastpath. For sinking candidates
+ // (i.e. cold call sites), this serves as a way to prevent excessive code
+ // growth since most architectures have some reasonable small and fast way to
+ // compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
Instruction *User = MemoryUses[i].first;
@@ -3616,6 +3685,11 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
+/// Sink addressing mode computation immediate before MemoryInst if doing so
+/// can be done without increasing register pressure. The need for the
+/// register pressure constraint means this can end up being an all or nothing
+/// decision for all uses of the same addressing computation.
+///
/// Load and Store Instructions often have addressing modes that can do
/// significant amounts of computation. As such, instruction selection will try
/// to get the load or store to do as much computation as possible for the
@@ -3623,7 +3697,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
/// such, we sink as much legal addressing mode work into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
-/// operands.
+/// operands. It's also used to sink addressing computations feeding into cold
+/// call sites into their (cold) basic block.
+///
+/// The motivation for handling sinking into cold blocks is that doing so can
+/// both enable other address mode sinking (by satisfying the register pressure
+/// constraint above), and reduce register pressure globally (by removing the
+/// addressing mode computation from the fast path entirely.).
bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace) {
Value *Repl = Addr;
@@ -3662,7 +3742,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
continue;
}
- // For non-PHIs, determine the addressing mode being computed.
+ // For non-PHIs, determine the addressing mode being computed. Note that
+ // the result may differ depending on what other uses our candidate
+ // addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
@@ -3945,12 +4027,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
if (Repl->use_empty()) {
// This can cause recursive deletion, which can invalidate our iterator.
// Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(&*CurInstIterator);
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
BasicBlock *BB = CurInstIterator->getParent();
RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
- if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
+ if (IterHandle != CurValue) {
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
CurInstIterator = BB->begin();
@@ -4461,11 +4544,27 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
/// Returns true if a SelectInst should be turned into an explicit branch.
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ const TargetLowering *TLI,
SelectInst *SI) {
+ // If even a predictable select is cheap, then a branch can't be cheaper.
+ if (!TLI->isPredictableSelectExpensive())
+ return false;
+
// FIXME: This should use the same heuristics as IfConversion to determine
- // whether a select is better represented as a branch. This requires that
- // branch probability metadata is preserved for the select, which is not the
- // case currently.
+ // whether a select is better represented as a branch.
+
+ // If metadata tells us that the select condition is obviously predictable,
+ // then we want to replace the select with a branch.
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TLI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
@@ -4475,17 +4574,6 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
if (!Cmp || !Cmp->hasOneUse())
return false;
- Value *CmpOp0 = Cmp->getOperand(0);
- Value *CmpOp1 = Cmp->getOperand(1);
-
- // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
- // on a load from memory. But if the load is used more than once, do not
- // change the select to a branch because the load is probably needed
- // regardless of whether the branch is taken or not.
- if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
- return true;
-
// If either operand of the select is expensive and only needed on one side
// of the select, we should form a branch.
if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
@@ -4502,7 +4590,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
+ SI->getMetadata(LLVMContext::MD_unpredictable))
return false;
TargetLowering::SelectSupportKind SelectKind;
@@ -4513,14 +4602,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
else
SelectKind = TargetLowering::ScalarValSelect;
- // Do we have efficient codegen support for this kind of 'selects' ?
- if (TLI->isSelectSupported(SelectKind)) {
- // We have efficient codegen support for the select instruction.
- // Check if it is profitable to keep this 'select'.
- if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(TTI, SI))
- return false;
- }
+ if (TLI->isSelectSupported(SelectKind) &&
+ !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+ return false;
ModifiedDT = true;
@@ -5145,7 +5229,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (!TLI || !TLI->hasMultipleConditionRegisters())
- return OptimizeCmpExpression(CI);
+ return OptimizeCmpExpression(CI, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
stripInvariantGroupMetadata(*LI);
@@ -5221,7 +5305,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
return false;
SmallVector<Instruction*, 4> Insts;
- if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts))
+ if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
return false;
Instruction *LastInst = Insts.back();
I.replaceAllUsesWith(LastInst);
@@ -5249,12 +5333,13 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
for (auto &I : reverse(BB)) {
if (makeBitReverse(I, *DL, *TLI)) {
MadeBitReverse = MadeChange = true;
+ ModifiedDT = true;
break;
}
}
}
MadeChange |= dupRetToEnableTailCallOpts(&BB);
-
+
return MadeChange;
}
@@ -5310,43 +5395,38 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
return false;
bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = &*I++;
-
+ for (BasicBlock &BB : F) {
// Does this BB end with the following?
// %andVal = and %val, #single-bit-set
// %icmpVal = icmp %andResult, 0
// br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
+ BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
if (!Brcc || !Brcc->isConditional())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != BB)
+ if (!Cmp || Cmp->getParent() != &BB)
continue;
ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
if (!Zero || !Zero->isZero())
continue;
Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
+ if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
continue;
ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
+ DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
// Push the "and; icmp" for any users that are conditional branches.
// Since there can only be one branch use per BB, we don't need to keep
// track of which BBs we insert into.
- for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
- UI != E; ) {
- Use &TheUse = *UI;
+ for (Use &TheUse : Cmp->uses()) {
// Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
- ++UI;
+ BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
if (!BrccUser || !BrccUser->isConditional())
continue;
BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == BB) continue;
+ if (UserBB == &BB) continue;
DEBUG(dbgs() << "found Brcc use\n");
// Sink the "and; icmp" to use.
@@ -5365,29 +5445,6 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
return MadeChange;
}
-/// \brief Retrieve the probabilities of a conditional branch. Returns true on
-/// success, or returns false if no or invalid metadata was found.
-static bool extractBranchMetadata(BranchInst *BI,
- uint64_t &ProbTrue, uint64_t &ProbFalse) {
- assert(BI->isConditional() &&
- "Looking for probabilities on unconditional branch?");
- auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
- if (!ProfileData || ProfileData->getNumOperands() != 3)
- return false;
-
- const auto *CITrue =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
- const auto *CIFalse =
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
- if (!CITrue || !CIFalse)
- return false;
-
- ProbTrue = CITrue->getValue().getZExtValue();
- ProbFalse = CIFalse->getValue().getZExtValue();
-
- return true;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
@@ -5456,11 +5513,9 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
// Create a new BB.
- auto *InsertBefore = std::next(Function::iterator(BB))
- .getNodePtrUnchecked();
- auto TmpBB = BasicBlock::Create(BB.getContext(),
- BB.getName() + ".cond.split",
- BB.getParent(), InsertBefore);
+ auto TmpBB =
+ BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
+ BB.getParent(), BB.getNextNode());
// Update original basic block by using the first condition directly by the
// branch instruction and removing the no longer needed and/or instruction.
@@ -5535,7 +5590,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// Another choice is to assume TrueProb for BB1 equals to TrueProb for
// TmpBB, but the math is more complicated.
uint64_t TrueWeight, FalseWeight;
- if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = TrueWeight;
uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
@@ -5568,7 +5623,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// assumes that
// FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
uint64_t TrueWeight, FalseWeight;
- if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
uint64_t NewFalseWeight = FalseWeight;
scaleWeights(NewTrueWeight, NewFalseWeight);
diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
deleted file mode 100644
index ff7c0d5..0000000
--- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a GCStrategy for the CoreCLR Runtime.
-// The strategy is similar to Statepoint-example GC, but differs from it in
-// certain aspects, such as:
-// 1) Base-pointers need not be explicitly tracked and reported for
-// interior pointers
-// 2) Uses a different format for encoding stack-maps
-// 3) Location of Safe-point polls: polls are only needed before loop-back edges
-// and before tail-calls (not needed at function-entry)
-//
-// The above differences in behavior are to be implemented in upcoming checkins.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Value.h"
-
-using namespace llvm;
-
-namespace {
-class CoreCLRGC : public GCStrategy {
-public:
- CoreCLRGC() {
- UseStatepoints = true;
- // These options are all gc.root specific, we specify them so that the
- // gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
- UsesMetadata = false;
- CustomRoots = false;
- }
- Optional<bool> isGCManagedPointer(const Type *Ty) const override {
- // Method is only valid on pointer typed values.
- const PointerType *PT = cast<PointerType>(Ty);
- // We pick addrspace(1) as our GC managed heap.
- return (1 == PT->getAddressSpace());
- }
-};
-}
-
-static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC");
-
-namespace llvm {
-void linkCoreCLRGC() {}
-}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c924ba3..a0189a1 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -87,7 +87,7 @@ void CriticalAntiDepBreaker::FinishBlock() {
KeepRegs.reset();
}
-void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
unsigned InsertPosIndex) {
// Kill instructions can define registers but are really nops, and there might
// be a real definition earlier that needs to be paired with uses dominated by
@@ -96,7 +96,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen in
// the AggressiveAntiDepBreaker class.
- if (MI->isDebugValue() || MI->isKill())
+ if (MI.isDebugValue() || MI.isKill())
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
@@ -146,7 +146,7 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
return Next;
}
-void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// It's not safe to change register allocation for source operands of
// instructions that have special allocation requirements. Also assume all
// registers used in a call must not be changed (ABI).
@@ -163,21 +163,20 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->isCall() ||
- MI->hasExtraSrcRegAllocReq() ||
- TII->isPredicated(MI);
+ bool Special =
+ MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI);
// Scan the register operands for this instruction and update
// Classes and RegRefs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
const TargetRegisterClass *NewRC = nullptr;
- if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -212,7 +211,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// of a register? In the above 'xor' example, the uses of %eax are undef, so
// earlier instructions could still replace %eax even though the 'xor'
// itself can't be changed.
- if (MI->isRegTiedToUseOperand(i) &&
+ if (MI.isRegTiedToUseOperand(i) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -234,18 +233,17 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
}
}
-void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
- unsigned Count) {
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
// Update liveness.
// Proceeding upwards, registers that are defed but not used in this
// instruction are now dead.
- assert(!MI->isKill() && "Attempting to scan a kill instruction");
+ assert(!MI.isKill() && "Attempting to scan a kill instruction");
if (!TII->isPredicated(MI)) {
// Predicated defs are modeled as read + write, i.e. similar to two
// address updates.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isRegMask())
for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
@@ -262,11 +260,13 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
if (Reg == 0) continue;
if (!MO.isDef()) continue;
- // If we've already marked this reg as unchangeable, carry on.
- if (KeepRegs.test(Reg)) continue;
-
// Ignore two-addr defs.
- if (MI->isRegTiedToUseOperand(i)) continue;
+ if (MI.isRegTiedToUseOperand(i))
+ continue;
+
+ // If we've already marked this reg as unchangeable, don't remove
+ // it or any of its subregs from KeepRegs.
+ bool Keep = KeepRegs.test(Reg);
// For the reg itself and all subregs: update the def to current;
// reset the kill state, any restrictions, and references.
@@ -274,25 +274,26 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned SubregReg = *SRI;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
- KeepRegs.reset(SubregReg);
Classes[SubregReg] = nullptr;
RegRefs.erase(SubregReg);
+ if (!Keep)
+ KeepRegs.reset(SubregReg);
}
// Conservatively mark super-registers as unusable.
for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isUse()) continue;
const TargetRegisterClass *NewRC = nullptr;
- if (i < MI->getDesc().getNumOperands())
- NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
@@ -510,7 +511,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
unsigned Broken = 0;
unsigned Count = InsertPosIndex - 1;
for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
- MachineInstr *MI = --I;
+ MachineInstr &MI = *--I;
// Kill instructions can define registers but are really nops, and there
// might be a real definition earlier that needs to be paired with uses
// dominated by this kill.
@@ -518,7 +519,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// FIXME: It may be possible to remove the isKill() restriction once PR18663
// has been properly fixed. There can be value in processing kills as seen
// in the AggressiveAntiDepBreaker class.
- if (MI->isDebugValue() || MI->isKill())
+ if (MI.isDebugValue() || MI.isKill())
continue;
// Check if this instruction has a dependence on the critical path that
@@ -535,7 +536,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// edge per instruction. Note that we'd have to be able to break all of
// the anti-dependencies in an instruction in order to be effective.
unsigned AntiDepReg = 0;
- if (MI == CriticalPathMI) {
+ if (&MI == CriticalPathMI) {
if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
const SUnit *NextSU = Edge->getSUnit();
@@ -585,7 +586,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI))
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
@@ -594,8 +595,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// is invalid. If the instruction defines other registers,
// save a list of them so that we don't pick a new register
// that overlaps any of them.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
@@ -647,7 +648,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
for (DbgValueVector::iterator DVI = DbgValues.begin(),
DVE = DbgValues.end(); DVI != DVE; ++DVI)
if (DVI->second == Q->second->getParent())
- UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
index 10b8739..678779f 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -19,17 +19,15 @@
#include "AntiDepBreaker.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include <map>
namespace llvm {
class RegisterClassInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
+class MachineFunction;
class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
@@ -84,15 +82,15 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
/// Update liveness information to account for the current
/// instruction, which will not be scheduled.
- void Observe(MachineInstr *MI, unsigned Count,
+ void Observe(MachineInstr &MI, unsigned Count,
unsigned InsertPosIndex) override;
/// Finish anti-dep breaking for a basic block.
void FinishBlock() override;
private:
- void PrescanInstruction(MachineInstr *MI);
- void ScanInstruction(MachineInstr *MI, unsigned Count);
+ void PrescanInstruction(MachineInstr &MI);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
RegRefIter RegRefEnd,
unsigned NewReg);
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index af6b6a3..2386af9 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -23,12 +23,15 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "packets"
+
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
+
using namespace llvm;
// --------------------------------------------------------------------
@@ -44,8 +47,8 @@ namespace {
/// DFAPacketizerEmitter.cpp.
DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
DFAInput InsnInput = 0;
- assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
+ assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
for (auto U : InsnClass)
InsnInput = addDFAFuncUnits(InsnInput, U);
return InsnInput;
@@ -59,15 +62,16 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
DFAStateEntryTable(SET) {
// Make sure DFA types are large enough for the number of terms & resources.
- assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
- && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
- assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
- && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+ static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
+ (8 * sizeof(DFAInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+ static_assert(
+ (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
}
-//
-// ReadTable - Read the DFA transition table and update CachedTable.
+// Read the DFA transition table and update CachedTable.
//
// Format of the transition tables:
// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
@@ -80,8 +84,7 @@ void DFAPacketizer::ReadTable(unsigned int state) {
unsigned NextStateInTable = DFAStateEntryTable[state+1];
// Early exit in case CachedTable has already contains this
// state's transitions.
- if (CachedTable.count(UnsignPair(state,
- DFAStateInputTable[ThisState][0])))
+ if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
return;
for (unsigned i = ThisState; i < NextStateInTable; i++)
@@ -89,38 +92,41 @@ void DFAPacketizer::ReadTable(unsigned int state) {
DFAStateInputTable[i][1];
}
-//
-// getInsnInput - Return the DFAInput for an instruction class.
-//
+
+// Return the DFAInput for an instruction class.
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
// Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
DFAInput InsnInput = 0;
unsigned i = 0;
+ (void)i;
for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
- *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
+ *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
- assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+ assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
}
return InsnInput;
}
-// getInsnInput - Return the DFAInput for an instruction class input vector.
+
+// Return the DFAInput for an instruction class input vector.
DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
return getDFAInsnInput(InsnClass);
}
-// canReserveResources - Check if the resources occupied by a MCInstrDesc
-// are available in the current state.
+
+// Check if the resources occupied by a MCInstrDesc are available in the
+// current state.
bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
- return (CachedTable.count(StateTrans) != 0);
+ return CachedTable.count(StateTrans) != 0;
}
-// reserveResources - Reserve the resources occupied by a MCInstrDesc and
-// change the current state to reflect that change.
+
+// Reserve the resources occupied by a MCInstrDesc and change the current
+// state to reflect that change.
void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
@@ -131,34 +137,46 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
}
-// canReserveResources - Check if the resources occupied by a machine
-// instruction are available in the current state.
-bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
- const llvm::MCInstrDesc &MID = MI->getDesc();
+// Check if the resources occupied by a machine instruction are available
+// in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) {
+ const llvm::MCInstrDesc &MID = MI.getDesc();
return canReserveResources(&MID);
}
-// reserveResources - Reserve the resources occupied by a machine
-// instruction and change the current state to reflect that change.
-void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
- const llvm::MCInstrDesc &MID = MI->getDesc();
+
+// Reserve the resources occupied by a machine instruction and change the
+// current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) {
+ const llvm::MCInstrDesc &MID = MI.getDesc();
reserveResources(&MID);
}
+
namespace llvm {
-// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
-// Schedule method to build the dependence graph.
+// This class extends ScheduleDAGInstrs and overrides the schedule method
+// to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
private:
AliasAnalysis *AA;
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
AliasAnalysis *AA);
- // Schedule - Actual scheduling work.
+ // Actual scheduling work.
void schedule() override;
+
+ /// DefaultVLIWScheduler takes ownership of the Mutation object.
+ void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ Mutations.push_back(std::move(Mutation));
+ }
+protected:
+ void postprocessDAG();
};
}
+
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
AliasAnalysis *AA)
@@ -166,42 +184,51 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
CanHandleTerminators = true;
}
+
+/// Apply each ScheduleDAGMutation step in order.
+void DefaultVLIWScheduler::postprocessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
+ postprocessDAG();
}
-// VLIWPacketizerList Ctor
-VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
- MachineLoopInfo &MLI, AliasAnalysis *AA)
- : MF(MF), AA(AA) {
- TII = MF.getSubtarget().getInstrInfo();
+
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
+ MachineLoopInfo &mli, AliasAnalysis *aa)
+ : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
- VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA);
+ VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
}
-// VLIWPacketizerList Dtor
+
VLIWPacketizerList::~VLIWPacketizerList() {
if (VLIWScheduler)
delete VLIWScheduler;
-
if (ResourceTracker)
delete ResourceTracker;
}
-// endPacket - End the current packet, bundle packet instructions and reset
-// DFA state.
+
+// End the current packet, bundle packet instructions and reset DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
- MachineInstr *MI) {
+ MachineBasicBlock::iterator MI) {
if (CurrentPacketMIs.size() > 1) {
- MachineInstr *MIFirst = CurrentPacketMIs.front();
- finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator());
+ MachineInstr &MIFirst = *CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
}
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
+ DEBUG(dbgs() << "End packet\n");
}
-// PacketizeMIs - Bundle machine instructions into packets.
+
+// Bundle machine instructions into packets.
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
MachineBasicBlock::iterator EndItr) {
@@ -211,64 +238,88 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
std::distance(BeginItr, EndItr));
VLIWScheduler->schedule();
+ DEBUG({
+ dbgs() << "Scheduling DAG of the packetize region\n";
+ for (SUnit &SU : VLIWScheduler->SUnits)
+ SU.dumpAll(VLIWScheduler);
+ });
+
// Generate MI -> SU map.
MIToSUnit.clear();
- for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
- SUnit *SU = &VLIWScheduler->SUnits[i];
- MIToSUnit[SU->getInstr()] = SU;
- }
+ for (SUnit &SU : VLIWScheduler->SUnits)
+ MIToSUnit[SU.getInstr()] = &SU;
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
- MachineInstr *MI = BeginItr;
-
- this->initPacketizerState();
+ MachineInstr &MI = *BeginItr;
+ initPacketizerState();
// End the current packet if needed.
- if (this->isSoloInstruction(MI)) {
+ if (isSoloInstruction(MI)) {
endPacket(MBB, MI);
continue;
}
// Ignore pseudo instructions.
- if (this->ignorePseudoInstruction(MI, MBB))
+ if (ignorePseudoInstruction(MI, MBB))
continue;
- SUnit *SUI = MIToSUnit[MI];
+ SUnit *SUI = MIToSUnit[&MI];
assert(SUI && "Missing SUnit Info!");
// Ask DFA if machine resource is available for MI.
+ DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
+
bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ DEBUG({
+ if (ResourceAvail)
+ dbgs() << " Resources are available for adding MI to packet\n";
+ else
+ dbgs() << " Resources NOT available\n";
+ });
if (ResourceAvail && shouldAddToPacket(MI)) {
// Dependency check for MI with instructions in CurrentPacketMIs.
- for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
- MachineInstr *MJ = *VI;
+ for (auto MJ : CurrentPacketMIs) {
SUnit *SUJ = MIToSUnit[MJ];
assert(SUJ && "Missing SUnit Info!");
+ DEBUG(dbgs() << " Checking against MJ " << *MJ);
// Is it legal to packetize SUI and SUJ together.
- if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+ if (!isLegalToPacketizeTogether(SUI, SUJ)) {
+ DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
// Allow packetization if dependency can be pruned.
- if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+ if (!isLegalToPruneDependencies(SUI, SUJ)) {
// End the packet if dependency cannot be pruned.
+ DEBUG(dbgs() << " Could not prune dependencies for adding MI\n");
endPacket(MBB, MI);
break;
- } // !isLegalToPruneDependencies.
- } // !isLegalToPacketizeTogether.
- } // For all instructions in CurrentPacketMIs.
+ }
+ DEBUG(dbgs() << " Pruned dependence for adding MI\n");
+ }
+ }
} else {
+ DEBUG(if (ResourceAvail)
+ dbgs() << "Resources are available, but instruction should not be "
+ "added to packet\n " << MI);
// End the packet if resource is not available, or if the instruction
// shoud not be added to the current packet.
endPacket(MBB, MI);
}
// Add MI to the current packet.
- BeginItr = this->addToPacket(MI);
- } // For all instructions in BB.
+ DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
+ BeginItr = addToPacket(MI);
+ } // For all instructions in the packetization range.
// End any packet left behind.
endPacket(MBB, EndItr);
VLIWScheduler->exitRegion();
VLIWScheduler->finishBlock();
}
+
+
+// Add a DAG mutation object to the ordered list.
+void VLIWPacketizerList::addMutation(
+ std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ VLIWScheduler->addMutation(std::move(Mutation));
+}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index b11b497..0b8dc7a 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -42,6 +42,11 @@ namespace {
initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
bool isDead(const MachineInstr *MI) const;
};
@@ -90,7 +95,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
}
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
bool AnyChanges = false;
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
new file mode 100644
index 0000000..1d9e79c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -0,0 +1,602 @@
+//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Analysis that tracks defined/used subregister lanes across COPY instructions
+/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
+/// INSERT_SUBREG, EXTRACT_SUBREG).
+/// The information is used to detect dead definitions and the usage of
+/// (completely) undefined values and mark the operands as such.
+/// This pass is necessary because the dead/undef status is not obvious anymore
+/// when subregisters are involved.
+///
+/// Example:
+/// %vreg0 = some definition
+/// %vreg1 = IMPLICIT_DEF
+/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
+/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1
+/// = use %vreg3
+/// The %vreg0 definition is dead and %vreg3 contains an undefined value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <deque>
+#include <vector>
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "detect-dead-lanes"
+
+namespace {
+
+/// Contains a bitmask of which lanes of a given virtual register are
+/// defined and which ones are actually used.
+struct VRegInfo {
+ LaneBitmask UsedLanes;
+ LaneBitmask DefinedLanes;
+};
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override { return "Detect Dead Lanes"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ /// Add used lane bits on the register used by operand \p MO. This translates
+ /// the bitmask based on the operands subregister, and puts the register into
+ /// the worklist if any new bits were added.
+ void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
+
+ /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
+ /// COPY-like instruction determine the lanes used on the use operands
+ /// and call addUsedLanesOnOperand() for them.
+ void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes);
+
+ /// Given a use regiser operand \p Use and a mask of defined lanes, check
+ /// if the operand belongs to a lowersToCopies() instruction, transfer the
+ /// mask to the def and put the instruction into the worklist.
+ void transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes);
+
+ /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
+ /// of COPY-like instruction, determine which lanes are defined at the output
+ /// operand \p Def.
+ LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
+ LaneBitmask DefinedLanes) const;
+
+ /// Given a mask \p UsedLanes used from the output of instruction \p MI
+ /// determine which lanes are used from operand \p MO of this instruction.
+ LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
+ const MachineOperand &MO) const;
+
+ bool runOnce(MachineFunction &MF);
+
+ LaneBitmask determineInitialDefinedLanes(unsigned Reg);
+ LaneBitmask determineInitialUsedLanes(unsigned Reg);
+
+ bool isUndefRegAtInput(const MachineOperand &MO,
+ const VRegInfo &RegInfo) const;
+
+ bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const;
+
+ const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+ void PutInWorklist(unsigned RegIdx) {
+ if (WorklistMembers.test(RegIdx))
+ return;
+ WorklistMembers.set(RegIdx);
+ Worklist.push_back(RegIdx);
+ }
+
+ VRegInfo *VRegInfos;
+ /// Worklist containing virtreg indexes.
+ std::deque<unsigned> Worklist;
+ BitVector WorklistMembers;
+ /// This bitvector is set for each vreg index where the vreg is defined
+ /// by an instruction where lowersToCopies()==true.
+ BitVector DefinedByCopy;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
+ false, false)
+
+/// Returns true if \p MI will get lowered to a series of COPY instructions.
+/// We call this a COPY-like instruction.
+static bool lowersToCopies(const MachineInstr &MI) {
+ // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
+ // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
+ // are not lowered to a COPY.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::EXTRACT_SUBREG:
+ return true;
+ }
+ return false;
+}
+
+static bool isCrossCopy(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ const TargetRegisterClass *DstRC,
+ const MachineOperand &MO) {
+ assert(lowersToCopies(MI));
+ unsigned SrcReg = MO.getReg();
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ if (DstRC == SrcRC)
+ return false;
+
+ unsigned SrcSubIdx = MO.getSubReg();
+
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned DstSubIdx = 0;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::INSERT_SUBREG:
+ if (MI.getOperandNo(&MO) == 2)
+ DstSubIdx = MI.getOperand(3).getImm();
+ break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned OpNum = MI.getOperandNo(&MO);
+ DstSubIdx = MI.getOperand(OpNum+1).getImm();
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubReg = MI.getOperand(2).getImm();
+ SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
+ }
+ }
+
+ unsigned PreA, PreB; // Unused.
+ if (SrcSubIdx && DstSubIdx)
+ return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
+ PreB);
+ if (SrcSubIdx)
+ return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
+ if (DstSubIdx)
+ return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
+ return !TRI.getCommonSubClass(SrcRC, DstRC);
+}
+
+void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
+ if (!MO.readsReg())
+ return;
+ unsigned MOReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(MOReg))
+ return;
+
+ unsigned MOSubReg = MO.getSubReg();
+ if (MOSubReg != 0)
+ UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
+ UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
+
+ unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
+ VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
+ // Any change at all?
+ if ((UsedLanes & ~PrevUsedLanes) == 0)
+ return;
+
+ // Set UsedLanes and remember instruction for further propagation.
+ MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
+ if (DefinedByCopy.test(MORegIdx))
+ PutInWorklist(MORegIdx);
+}
+
+void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
+ LaneBitmask UsedLanes) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
+ addUsedLanesOnOperand(MO, UsedOnMO);
+ }
+}
+
+LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
+ LaneBitmask UsedLanes,
+ const MachineOperand &MO) const {
+ unsigned OpNum = MI.getOperandNo(&MO);
+ assert(lowersToCopies(MI) && DefinedByCopy[
+ TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ return UsedLanes;
+ case TargetOpcode::REG_SEQUENCE: {
+ assert(OpNum % 2 == 1);
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ LaneBitmask MO2UsedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ if (OpNum == 2)
+ return MO2UsedLanes;
+
+ const MachineOperand &Def = MI.getOperand(0);
+ unsigned DefReg = Def.getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LaneBitmask MO1UsedLanes;
+ if (RC->CoveredBySubRegs)
+ MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
+ else
+ MO1UsedLanes = RC->LaneMask;
+
+ assert(OpNum == 1);
+ return MO1UsedLanes;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ assert(OpNum == 1);
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+}
+
+void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
+ if (!Use.readsReg())
+ return;
+ // Check whether the operand writes a vreg and is part of a COPY-like
+ // instruction.
+ const MachineInstr &MI = *Use.getParent();
+ if (MI.getDesc().getNumDefs() != 1)
+ return;
+ // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
+ // they really need to be modeled differently!
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ return;
+ const MachineOperand &Def = *MI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ return;
+ unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return;
+
+ unsigned OpNum = MI.getOperandNo(&Use);
+ DefinedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
+ DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
+
+ VRegInfo &RegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
+ // Any change at all?
+ if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+ return;
+
+ RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
+ PutInWorklist(DefRegIdx);
+}
+
+LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
+ unsigned OpNum, LaneBitmask DefinedLanes) const {
+ const MachineInstr &MI = *Def.getParent();
+ // Translate DefinedLanes if necessary.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ break;
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ if (OpNum == 2) {
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
+ // Ignore lanes defined by operand 2.
+ DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
+ }
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
+ DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ break;
+ }
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ break;
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
+ return DefinedLanes;
+}
+
+LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
+ // Live-In or unused registers have no definition but are considered fully
+ // defined.
+ if (!MRI->hasOneDef(Reg))
+ return ~0u;
+
+ const MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &DefMI = *Def.getParent();
+ if (lowersToCopies(DefMI)) {
+ // Start optimisatically with no used or defined lanes for copy
+ // instructions. The following dataflow analysis will add more bits.
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ DefinedByCopy.set(RegIdx);
+ PutInWorklist(RegIdx);
+
+ if (Def.isDead())
+ return 0;
+
+ // COPY/PHI can copy across unrelated register classes (example: float/int)
+ // with incompatible subregister structure. Do not include these in the
+ // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ // Determine initially DefinedLanes.
+ LaneBitmask DefinedLanes = 0;
+ for (const MachineOperand &MO : DefMI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ LaneBitmask MODefinedLanes;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ MODefinedLanes = ~0u;
+ } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
+ MODefinedLanes = ~0u;
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(MOReg));
+ if (MRI->hasOneDef(MOReg)) {
+ const MachineOperand &MODef = *MRI->def_begin(MOReg);
+ const MachineInstr &MODefMI = *MODef.getParent();
+ // Bits from copy-like operations will be added later.
+ if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
+ continue;
+ }
+ unsigned MOSubReg = MO.getSubReg();
+ MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
+ MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
+ MOSubReg, MODefinedLanes);
+ }
+
+ unsigned OpNum = DefMI.getOperandNo(&MO);
+ DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
+ }
+ return DefinedLanes;
+ }
+ if (DefMI.isImplicitDef() || Def.isDead())
+ return 0;
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ return MRI->getMaxLaneMaskForVReg(Reg);
+}
+
+LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
+ LaneBitmask UsedLanes = 0;
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ if (!MO.readsReg())
+ continue;
+
+ const MachineInstr &UseMI = *MO.getParent();
+ if (UseMI.isKill())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (lowersToCopies(UseMI)) {
+ assert(UseMI.getDesc().getNumDefs() == 1);
+ const MachineOperand &Def = *UseMI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ // The used lanes of COPY-like instruction operands are determined by the
+ // following dataflow analysis.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ // But ignore copies across incompatible register classes.
+ bool CrossCopy = false;
+ if (lowersToCopies(UseMI)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
+ if (CrossCopy)
+ DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI);
+ }
+
+ if (!CrossCopy)
+ continue;
+ }
+ }
+
+ // Shortcut: All lanes are used.
+ if (SubReg == 0)
+ return MRI->getMaxLaneMaskForVReg(Reg);
+
+ UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
+ }
+ return UsedLanes;
+}
+
+bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
+ const VRegInfo &RegInfo) const {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0;
+}
+
+bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
+ bool *CrossCopy) const {
+ if (!MO.isUse())
+ return false;
+ const MachineInstr &MI = *MO.getParent();
+ if (!lowersToCopies(MI))
+ return false;
+ const MachineOperand &Def = MI.getOperand(0);
+ unsigned DefReg = Def.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ return false;
+ unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return false;
+
+ const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
+ if (UsedLanes != 0)
+ return false;
+
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
+ }
+ return true;
+}
+
+bool DetectDeadLanes::runOnce(MachineFunction &MF) {
+ // First pass: Populate defs/uses of vregs with initial values
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Determine used/defined lanes and add copy instructions to worklist.
+ VRegInfo &Info = VRegInfos[RegIdx];
+ Info.DefinedLanes = determineInitialDefinedLanes(Reg);
+ Info.UsedLanes = determineInitialUsedLanes(Reg);
+ }
+
+ // Iterate as long as defined lanes/used lanes keep changing.
+ while (!Worklist.empty()) {
+ unsigned RegIdx = Worklist.front();
+ Worklist.pop_front();
+ WorklistMembers.reset(RegIdx);
+ VRegInfo &Info = VRegInfos[RegIdx];
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Transfer UsedLanes to operands of DefMI (backwards dataflow).
+ MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &MI = *Def.getParent();
+ transferUsedLanesStep(MI, Info.UsedLanes);
+ // Transfer DefinedLanes to users of Reg (forward dataflow).
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
+ transferDefinedLanesStep(MO, Info.DefinedLanes);
+ }
+
+ DEBUG(
+ dbgs() << "Defined/Used lanes:\n";
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << PrintReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ }
+ dbgs() << "\n";
+ );
+
+ bool Again = false;
+ // Mark operands as dead/unused.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ const VRegInfo &RegInfo = VRegInfos[RegIdx];
+ if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
+ MO.setIsDead();
+ }
+ if (MO.readsReg()) {
+ bool CrossCopy = false;
+ if (isUndefRegAtInput(MO, RegInfo)) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
+ << MI);
+ MO.setIsUndef();
+ } else if (isUndefInput(MO, &CrossCopy)) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
+ << MI);
+ MO.setIsUndef();
+ if (CrossCopy)
+ Again = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Again;
+}
+
+bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
+ // Don't bother if we won't track subregister liveness later. This pass is
+ // required for correctness if subregister liveness is enabled because the
+ // register coalescer cannot deal with hidden dead defs. However without
+ // subregister liveness enabled, the expected benefits of this pass are small
+ // so we safe the compile time.
+ if (!MF.getSubtarget().enableSubRegLiveness()) {
+ DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
+ return false;
+ }
+
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = new VRegInfo[NumVirtRegs];
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+
+ bool Again;
+ do {
+ Again = runOnce(MF);
+ } while(Again);
+
+ DefinedByCopy.clear();
+ WorklistMembers.clear();
+ delete[] VRegInfos;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index f3536d7..8c96124 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -278,7 +278,7 @@ bool SSAIfConv::findInsertionPoint() {
while (I != B) {
--I;
// Some of the conditional code depends in I.
- if (InsertAfter.count(I)) {
+ if (InsertAfter.count(&*I)) {
DEBUG(dbgs() << "Can't insert code after " << *I);
return false;
}
@@ -386,7 +386,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// The branch we're looking to eliminate must be analyzable.
Cond.clear();
- if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) {
+ if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
DEBUG(dbgs() << "Branch not analyzable.\n");
return false;
}
@@ -480,7 +480,7 @@ void SSAIfConv::rewritePHIOperands() {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
unsigned DstReg = 0;
-
+
DEBUG(dbgs() << "If-converting " << *PI.PHI);
if (PI.TReg == PI.FReg) {
// We do not need the select instruction if both incoming values are
@@ -718,7 +718,7 @@ bool EarlyIfConverter::shouldConvertIf() {
// TBB / FBB data dependencies may delay the select even more.
MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
unsigned BranchDepth =
- HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+ HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
// Look at all the tail phis, and compute the critical path extension caused
@@ -726,8 +726,8 @@ bool EarlyIfConverter::shouldConvertIf() {
MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
- unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
- unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+ unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
// The condition is pulled into the critical path.
@@ -742,7 +742,7 @@ bool EarlyIfConverter::shouldConvertIf() {
}
// The TBB value is pulled into the critical path.
- unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
if (TDepth > MaxDepth) {
unsigned Extra = TDepth - MaxDepth;
DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
@@ -753,7 +753,7 @@ bool EarlyIfConverter::shouldConvertIf() {
}
// The FBB value is pulled into the critical path.
- unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
if (FDepth > MaxDepth) {
unsigned Extra = FDepth - MaxDepth;
DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
@@ -785,6 +785,9 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
<< "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
// Only run if conversion if the target wants it.
const TargetSubtargetInfo &STI = MF.getSubtarget();
if (!STI.enableEarlyIfConversion())
diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp
deleted file mode 100644
index 024946d..0000000
--- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Erlang/OTP runtime-compatible garbage collector
-// (e.g. defines safe points, root initialization etc.)
-//
-// The frametable emitter is in ErlangGCPrinter.cpp.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCs.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-
-using namespace llvm;
-
-namespace {
-
-class ErlangGC : public GCStrategy {
-public:
- ErlangGC();
-};
-}
-
-static GCRegistry::Add<ErlangGC> X("erlang",
- "erlang-compatible garbage collector");
-
-void llvm::linkErlangGC() {}
-
-ErlangGC::ErlangGC() {
- InitRoots = false;
- NeededSafePoints = 1 << GC::PostCall;
- UsesMetadata = true;
- CustomRoots = false;
-}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index c550008..566b8d5 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -168,6 +168,11 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
const char *getPassName() const override {
return "Execution dependency fix";
}
@@ -315,7 +320,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
// Collapse all the instructions.
while (!dv->Instrs.empty())
- TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain);
+ TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
dv->setSingleDomain(domain);
// If there are multiple users, give them new, unique DomainValues.
@@ -455,7 +460,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
return;
// Update instructions with explicit execution domains.
- std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
if (DomP.first) {
if (DomP.second)
visitSoftInstr(MI, DomP.second);
@@ -503,7 +508,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
// Break dependence on undef uses. Do this before updating LiveRegs below.
unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI);
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
if (shouldBreakDependence(MI, OpNum, Pref))
UndefReads.push_back(std::make_pair(MI, OpNum));
@@ -526,9 +531,9 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
// Check clearance before partial register updates.
// Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(MI, i, TRI);
+ TII->breakPartialRegDependency(*MI, i, TRI);
// How many instructions since rx was last written?
LiveRegs[rx].Def = CurInstr;
@@ -553,7 +558,9 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
// Collect this block's live out register units.
LiveRegSet.init(TRI);
- LiveRegSet.addLiveOuts(MBB);
+ // We do not need to care about pristine registers as they are just preserved
+ // but not actually used in the function.
+ LiveRegSet.addLiveOutsNoPristines(*MBB);
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
@@ -564,7 +571,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
if (UndefMI == &I) {
if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
- TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
+ TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
UndefReads.pop_back();
if (UndefReads.empty())
@@ -638,7 +645,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// If the collapsed operands force a single domain, propagate the collapse.
if (isPowerOf2_32(available)) {
unsigned domain = countTrailingZeros(available);
- TII->setExecutionDomain(mi, domain);
+ TII->setExecutionDomain(*mi, domain);
visitHardInstr(mi, domain);
return;
}
@@ -719,6 +726,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
MF = &mf;
TII = MF->getSubtarget().getInstrInfo();
TRI = MF->getSubtarget().getRegisterInfo();
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
index 90ddac9..0ec79c2 100644
--- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -53,13 +53,12 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB = &*I;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
MBBI != MBBE; ) {
- MachineInstr *MI = MBBI++;
+ MachineInstr &MI = *MBBI++;
// If MI is a pseudo, expand it.
- if (MI->usesCustomInsertionHook()) {
+ if (MI.usesCustomInsertionHook()) {
Changed = true;
- MachineBasicBlock *NewMBB =
- TLI->EmitInstrWithCustomInserter(MI, MBB);
+ MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
// The expansion may involve new basic blocks.
if (NewMBB != MBB) {
MBB = NewMBB;
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index e7bf143..ab2382e 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -51,7 +51,7 @@ private:
bool LowerSubregToReg(MachineInstr *MI);
bool LowerCopy(MachineInstr *MI);
- void TransferImplicitDefs(MachineInstr *MI);
+ void TransferImplicitOperands(MachineInstr *MI);
};
} // end anonymous namespace
@@ -61,20 +61,16 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
"Post-RA pseudo instruction expansion pass", false, false)
-/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
-/// replacement instructions immediately precede it. Copy any implicit-def
+/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit
/// operands from MI to the replacement instruction.
-void
-ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) {
+void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
MachineBasicBlock::iterator CopyMI = MI;
--CopyMI;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
- continue;
- CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
- }
+ for (const MachineOperand &MO : MI->implicit_operands())
+ if (MO.isReg())
+ CopyMI->addOperand(MO);
}
bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
@@ -167,7 +163,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
if (MI->getNumOperands() > 2)
- TransferImplicitDefs(MI);
+ TransferImplicitOperands(MI);
DEBUG({
MachineBasicBlock::iterator dMI = MI;
dbgs() << "replaced by: " << *(--dMI);
@@ -192,12 +188,12 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
mbbi != mbbe; ++mbbi) {
for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
mi != me;) {
- MachineInstr *MI = mi;
+ MachineInstr &MI = *mi;
// Advance iterator here because MI may be erased.
++mi;
// Only expand pseudos.
- if (!MI->isPseudo())
+ if (!MI.isPseudo())
continue;
// Give targets a chance to expand even standard pseudos.
@@ -207,12 +203,12 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
}
// Expand standard pseudos.
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case TargetOpcode::SUBREG_TO_REG:
- MadeChange |= LowerSubregToReg(MI);
+ MadeChange |= LowerSubregToReg(&MI);
break;
case TargetOpcode::COPY:
- MadeChange |= LowerCopy(MI);
+ MadeChange |= LowerCopy(&MI);
break;
case TargetOpcode::DBG_VALUE:
continue;
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index 8b2f505..b16f81c 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -28,6 +28,10 @@ public:
}
bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
};
}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 484d317..326adab 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -64,7 +64,7 @@ class GCMachineCodeAnalysis : public MachineFunctionPass {
void FindSafePoints(MachineFunction &MF);
void VisitCallPoint(MachineBasicBlock::iterator MI);
MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- DebugLoc DL) const;
+ const DebugLoc &DL) const;
void FindStackOffsets(MachineFunction &MF);
@@ -170,8 +170,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
if (!InitedRoots.count(*I)) {
StoreInst *SI = new StoreInst(
- ConstantPointerNull::get(cast<PointerType>(
- cast<PointerType>((*I)->getType())->getElementType())),
+ ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())),
*I);
SI->insertAfter(*I);
MadeChange = true;
@@ -271,7 +270,7 @@ void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- DebugLoc DL) const {
+ const DebugLoc &DL) const {
MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol();
BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
return Label;
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
new file mode 100644
index 0000000..231e5ac
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -0,0 +1,30 @@
+//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+// This file implements the common initialization routines for the
+// GlobalISel library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+
+void llvm::initializeGlobalISel(PassRegistry &Registry) {
+}
+
+#else
+
+void llvm::initializeGlobalISel(PassRegistry &Registry) {
+ initializeIRTranslatorPass(Registry);
+ initializeRegBankSelectPass(Registry);
+}
+#endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
new file mode 100644
index 0000000..b8a960c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -0,0 +1,164 @@
+//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the IRTranslator class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Target/TargetLowering.h"
+
+#define DEBUG_TYPE "irtranslator"
+
+using namespace llvm;
+
+char IRTranslator::ID = 0;
+INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI",
+ false, false);
+
+IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+ initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
+}
+
+unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
+ unsigned &ValReg = ValToVReg[&Val];
+ // Check if this is the first time we see Val.
+ if (!ValReg) {
+ // Fill ValRegsSequence with the sequence of registers
+ // we need to concat together to produce the value.
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+ assert(!Val.getType()->isAggregateType() && "Not yet implemented");
+ unsigned Size = Val.getType()->getPrimitiveSizeInBits();
+ unsigned VReg = MRI->createGenericVirtualRegister(Size);
+ ValReg = VReg;
+ assert(!isa<Constant>(Val) && "Not yet implemented");
+ }
+ return ValReg;
+}
+
+MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
+ MachineBasicBlock *&MBB = BBToMBB[&BB];
+ if (!MBB) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MBB = MF.CreateMachineBasicBlock();
+ MF.push_back(MBB);
+ }
+ return *MBB;
+}
+
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) {
+ // Get or create a virtual register for each value.
+ // Unless the value is a Constant => loadimm cst?
+ // or inline constant each time?
+ // Creation of a virtual register needs to have a size.
+ unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0));
+ unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1));
+ unsigned Res = getOrCreateVReg(Inst);
+ MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1);
+ return true;
+}
+
+bool IRTranslator::translateReturn(const Instruction &Inst) {
+ assert(isa<ReturnInst>(Inst) && "Return expected");
+ const Value *Ret = cast<ReturnInst>(Inst).getReturnValue();
+ // The target may mess up with the insertion point, but
+ // this is not important as a return is the last instruction
+ // of the block anyway.
+ return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
+}
+
+bool IRTranslator::translateBr(const Instruction &Inst) {
+ assert(isa<BranchInst>(Inst) && "Branch expected");
+ const BranchInst &BrInst = *cast<BranchInst>(&Inst);
+ if (BrInst.isUnconditional()) {
+ const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0));
+ MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
+ MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB);
+ } else {
+ assert(0 && "Not yet implemented");
+ }
+ // Link successors.
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : BrInst.successors())
+ CurBB.addSuccessor(&getOrCreateBB(*Succ));
+ return true;
+}
+
+bool IRTranslator::translate(const Instruction &Inst) {
+ MIRBuilder.setDebugLoc(Inst.getDebugLoc());
+ switch(Inst.getOpcode()) {
+ case Instruction::Add:
+ return translateBinaryOp(TargetOpcode::G_ADD, Inst);
+ case Instruction::Or:
+ return translateBinaryOp(TargetOpcode::G_OR, Inst);
+ case Instruction::Br:
+ return translateBr(Inst);
+ case Instruction::Ret:
+ return translateReturn(Inst);
+
+ default:
+ llvm_unreachable("Opcode not supported");
+ }
+}
+
+
+void IRTranslator::finalize() {
+ // Release the memory used by the different maps we
+ // needed during the translation.
+ ValToVReg.clear();
+ Constants.clear();
+}
+
+bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = *MF.getFunction();
+ if (F.empty())
+ return false;
+ CLI = MF.getSubtarget().getCallLowering();
+ MIRBuilder.setMF(MF);
+ MRI = &MF.getRegInfo();
+ // Setup the arguments.
+ MachineBasicBlock &MBB = getOrCreateBB(F.front());
+ MIRBuilder.setMBB(MBB);
+ SmallVector<unsigned, 8> VRegArgs;
+ for (const Argument &Arg: F.args())
+ VRegArgs.push_back(getOrCreateVReg(Arg));
+ bool Succeeded =
+ CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs);
+ if (!Succeeded)
+ report_fatal_error("Unable to lower arguments");
+
+ for (const BasicBlock &BB: F) {
+ MachineBasicBlock &MBB = getOrCreateBB(BB);
+ // Set the insertion point of all the following translations to
+ // the end of this basic block.
+ MIRBuilder.setMBB(MBB);
+ for (const Instruction &Inst: BB) {
+ bool Succeeded = translate(Inst);
+ if (!Succeeded) {
+ DEBUG(dbgs() << "Cannot translate: " << Inst << '\n');
+ report_fatal_error("Unable to translate instruction");
+ }
+ }
+ }
+
+ // Now that the MachineFrameInfo has been configured, no further changes to
+ // the reserved registers are possible.
+ MRI->freezeReservedRegs(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
new file mode 100644
index 0000000..2f19bcf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -0,0 +1,104 @@
+//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the MachineIRBuidler class.
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+void MachineIRBuilder::setMF(MachineFunction &MF) {
+ this->MF = &MF;
+ this->MBB = nullptr;
+ this->TII = MF.getSubtarget().getInstrInfo();
+ this->DL = DebugLoc();
+ this->MI = nullptr;
+}
+
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) {
+ this->MBB = &MBB;
+ Before = Beginning;
+ assert(&getMF() == MBB.getParent() &&
+ "Basic block is in a different function");
+}
+
+void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) {
+ assert(MI.getParent() && "Instruction is not part of a basic block");
+ setMBB(*MI.getParent());
+ this->MI = &MI;
+ this->Before = Before;
+}
+
+MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() {
+ if (MI) {
+ if (Before)
+ return MI;
+ if (!MI->getNextNode())
+ return getMBB().end();
+ return MI->getNextNode();
+ }
+ return Before ? getMBB().begin() : getMBB().end();
+}
+
+//------------------------------------------------------------------------------
+// Build instruction variants.
+//------------------------------------------------------------------------------
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) {
+ MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode));
+ if (Ty) {
+ assert(isPreISelGenericOpcode(Opcode) &&
+ "Only generic instruction can have a type");
+ NewMI->setType(Ty);
+ } else
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Generic instruction must have a type");
+ getMBB().insert(getInsertPt(), NewMI);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
+ unsigned Op0, unsigned Op1) {
+ return buildInstr(Opcode, nullptr, Res, Op0, Op1);
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ MachineInstr *NewMI = buildInstr(Opcode, Ty);
+ MachineInstrBuilder(getMF(), NewMI)
+ .addReg(Res, RegState::Define)
+ .addReg(Op0)
+ .addReg(Op1);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
+ unsigned Op0) {
+ MachineInstr *NewMI = buildInstr(Opcode, nullptr);
+ MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) {
+ return buildInstr(Opcode, nullptr);
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
+ MachineBasicBlock &BB) {
+ MachineInstr *NewMI = buildInstr(Opcode, Ty);
+ MachineInstrBuilder(getMF(), NewMI).addMBB(&BB);
+ return NewMI;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
new file mode 100644
index 0000000..419e270
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -0,0 +1,897 @@
+//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegBankSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "regbankselect"
+
+using namespace llvm;
+
+static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
+ cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional,
+ cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
+ "Run the Fast mode (default mapping)"),
+ clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
+ "Use the Greedy mode (best local mapping)"),
+ clEnumValEnd));
+
+char RegBankSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect",
+ "Assign register bank of generic virtual registers",
+ false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(RegBankSelect, "regbankselect",
+ "Assign register bank of generic virtual registers", false,
+ false);
+
+RegBankSelect::RegBankSelect(Mode RunningMode)
+ : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
+ MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
+ if (RegBankSelectMode.getNumOccurrences() != 0) {
+ OptMode = RegBankSelectMode;
+ if (RegBankSelectMode != RunningMode)
+ DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
+ }
+}
+
+void RegBankSelect::init(MachineFunction &MF) {
+ RBI = MF.getSubtarget().getRegBankInfo();
+ assert(RBI && "Cannot work without RegisterBankInfo");
+ MRI = &MF.getRegInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ if (OptMode != Mode::Fast) {
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ } else {
+ MBFI = nullptr;
+ MBPI = nullptr;
+ }
+ MIRBuilder.setMF(MF);
+}
+
+void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (OptMode != Mode::Fast) {
+ // We could preserve the information from these two analysis but
+ // the APIs do not allow to do so yet.
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegBankSelect::assignmentMatch(
+ unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+ bool &OnlyAssign) const {
+ // By default we assume we will have to repair something.
+ OnlyAssign = false;
+ // Each part of a break down needs to end up in a different register.
+ // In other word, Reg assignement does not match.
+ if (ValMapping.BreakDown.size() > 1)
+ return false;
+
+ const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
+ const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ // Reg is free of assignment, a simple assignment will make the
+ // register bank to match.
+ OnlyAssign = CurRegBank == nullptr;
+ DEBUG(dbgs() << "Does assignment already match: ";
+ if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
+ dbgs() << " against ";
+ assert(DesiredRegBrank && "The mapping must be valid");
+ dbgs() << *DesiredRegBrank << '\n';);
+ return CurRegBank == DesiredRegBrank;
+}
+
+void RegBankSelect::repairReg(
+ MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
+ RegBankSelect::RepairingPlacement &RepairPt,
+ const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
+ assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented");
+ // An empty range of new register means no repairing.
+ assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
+
+ // Assume we are repairing a use and thus, the original reg will be
+ // the source of the repairing.
+ unsigned Src = MO.getReg();
+ unsigned Dst = *NewVRegs.begin();
+
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(Src, Dst);
+
+ assert((RepairPt.getNumInsertPoints() == 1 ||
+ TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ "We are about to create several defs for Dst");
+
+ // Build the instruction used to repair, then clone it at the right places.
+ MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src);
+ MI->removeFromParent();
+ DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
+ << '\n');
+ // TODO:
+ // Check if MI is legal. if not, we need to legalize all the
+ // instructions we are going to insert.
+ std::unique_ptr<MachineInstr *[]> NewInstrs(
+ new MachineInstr *[RepairPt.getNumInsertPoints()]);
+ bool IsFirst = true;
+ unsigned Idx = 0;
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ MachineInstr *CurMI;
+ if (IsFirst)
+ CurMI = MI;
+ else
+ CurMI = MIRBuilder.getMF().CloneMachineInstr(MI);
+ InsertPt->insert(*CurMI);
+ NewInstrs[Idx++] = CurMI;
+ IsFirst = false;
+ }
+ // TODO:
+ // Legalize NewInstrs if need be.
+}
+
+uint64_t RegBankSelect::getRepairCost(
+ const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ assert(MO.isReg() && "We should only repair register operand");
+ assert(!ValMapping.BreakDown.empty() && "Nothing to map??");
+
+ bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1;
+ const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
+ // If MO does not have a register bank, we should have just been
+ // able to set one unless we have to break the value down.
+ assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair");
+ // Def: Val <- NewDefs
+ // Same number of values: copy
+ // Different number: Val = build_sequence Defs1, Defs2, ...
+ // Use: NewSources <- Val.
+ // Same number of values: copy.
+ // Different number: Src1, Src2, ... =
+ // extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ...
+ // We should remember that this value is available somewhere else to
+ // coalesce the value.
+
+ if (IsSameNumOfValues) {
+ const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(CurRegBank, DesiredRegBrank);
+ // TODO: It may be possible to actually avoid the copy.
+ // If we repair something where the source is defined by a copy
+ // and the source of that copy is on the right bank, we can reuse
+ // it for free.
+ // E.g.,
+ // RegToRepair<BankA> = copy AlternativeSrc<BankB>
+ // = op RegToRepair<BankA>
+ // We can simply propagate AlternativeSrc instead of copying RegToRepair
+ // into a new virtual register.
+ // We would also need to propagate this information in the
+ // repairing placement.
+ unsigned Cost =
+ RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+ RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
+ // TODO: use a dedicated constant for ImpossibleCost.
+ if (Cost != UINT_MAX)
+ return Cost;
+ assert(false && "Legalization not available yet");
+ // Return the legalization cost of that repairing.
+ }
+ assert(false && "Complex repairing not implemented yet");
+ return 1;
+}
+
+RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
+ MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
+ SmallVectorImpl<RepairingPlacement> &RepairPts) {
+
+ RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
+ MappingCost Cost = MappingCost::ImpossibleCost();
+ SmallVector<RepairingPlacement, 4> LocalRepairPts;
+ for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) {
+ MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost);
+ if (CurCost < Cost) {
+ Cost = CurCost;
+ BestMapping = &CurMapping;
+ RepairPts.clear();
+ for (RepairingPlacement &RepairPt : LocalRepairPts)
+ RepairPts.emplace_back(std::move(RepairPt));
+ }
+ }
+ assert(BestMapping && "No suitable mapping for instruction");
+ return *BestMapping;
+}
+
+void RegBankSelect::tryAvoidingSplit(
+ RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ const MachineInstr &MI = *MO.getParent();
+ assert(RepairPt.hasSplit() && "We should not have to adjust for split");
+ // Splitting should only occur for PHIs or between terminators,
+ // because we only do local repairing.
+ assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?");
+
+ assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO &&
+ "Repairing placement does not match operand");
+
+ // If we need splitting for phis, that means it is because we
+ // could not find an insertion point before the terminators of
+ // the predecessor block for this argument. In other words,
+ // the input value is defined by one of the terminators.
+ assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?");
+
+ // We split to repair the use of a phi or a terminator.
+ if (!MO.isDef()) {
+ if (MI.isTerminator()) {
+ assert(&MI != &(*MI.getParent()->getFirstTerminator()) &&
+ "Need to split for the first terminator?!");
+ } else {
+ // For the PHI case, the split may not be actually required.
+ // In the copy case, a phi is already a copy on the incoming edge,
+ // therefore there is no need to split.
+ if (ValMapping.BreakDown.size() == 1)
+ // This is a already a copy, there is nothing to do.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
+ }
+ return;
+ }
+
+ // At this point, we need to repair a defintion of a terminator.
+
+ // Technically we need to fix the def of MI on all outgoing
+ // edges of MI to keep the repairing local. In other words, we
+ // will create several definitions of the same register. This
+ // does not work for SSA unless that definition is a physical
+ // register.
+ // However, there are other cases where we can get away with
+ // that while still keeping the repairing local.
+ assert(MI.isTerminator() && MO.isDef() &&
+ "This code is for the def of a terminator");
+
+ // Since we use RPO traversal, if we need to repair a definition
+ // this means this definition could be:
+ // 1. Used by PHIs (i.e., this VReg has been visited as part of the
+ // uses of a phi.), or
+ // 2. Part of a target specific instruction (i.e., the target applied
+ // some register class constraints when creating the instruction.)
+ // If the constraints come for #2, the target said that another mapping
+ // is supported so we may just drop them. Indeed, if we do not change
+ // the number of registers holding that value, the uses will get fixed
+ // when we get to them.
+ // Uses in PHIs may have already been proceeded though.
+ // If the constraints come for #1, then, those are weak constraints and
+ // no actual uses may rely on them. However, the problem remains mainly
+ // the same as for #2. If the value stays in one register, we could
+ // just switch the register bank of the definition, but we would need to
+ // account for a repairing cost for each phi we silently change.
+ //
+ // In any case, if the value needs to be broken down into several
+ // registers, the repairing is not local anymore as we need to patch
+ // every uses to rebuild the value in just one register.
+ //
+ // To summarize:
+ // - If the value is in a physical register, we can do the split and
+ // fix locally.
+ // Otherwise if the value is in a virtual register:
+ // - If the value remains in one register, we do not have to split
+ // just switching the register bank would do, but we need to account
+ // in the repairing cost all the phi we changed.
+ // - If the value spans several registers, then we cannot do a local
+ // repairing.
+
+ // Check if this is a physical or virtual register.
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // We are going to split every outgoing edges.
+ // Check that this is possible.
+ // FIXME: The machine representation is currently broken
+ // since it also several terminators in one basic block.
+ // Because of that we would technically need a way to get
+ // the targets of just one terminator to know which edges
+ // we have to split.
+ // Assert that we do not hit the ill-formed representation.
+
+ // If there are other terminators before that one, some of
+ // the outgoing edges may not be dominated by this definition.
+ assert(&MI == &(*MI.getParent()->getFirstTerminator()) &&
+ "Do not know which outgoing edges are relevant");
+ const MachineInstr *Next = MI.getNextNode();
+ assert((!Next || Next->isUnconditionalBranch()) &&
+ "Do not know where each terminator ends up");
+ if (Next)
+ // If the next terminator uses Reg, this means we have
+ // to split right after MI and thus we need a way to ask
+ // which outgoing edges are affected.
+ assert(!Next->readsRegister(Reg) && "Need to split between terminators");
+ // We will split all the edges and repair there.
+ } else {
+ // This is a virtual register defined by a terminator.
+ if (ValMapping.BreakDown.size() == 1) {
+ // There is nothing to repair, but we may actually lie on
+ // the repairing cost because of the PHIs already proceeded
+ // as already stated.
+ // Though the code will be correct.
+ assert(0 && "Repairing cost may not be accurate");
+ } else {
+ // We need to do non-local repairing. Basically, patch all
+ // the uses (i.e., phis) that we already proceeded.
+ // For now, just say this mapping is not possible.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible);
+ }
+ }
+}
+
+RegBankSelect::MappingCost RegBankSelect::computeMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RepairingPlacement> &RepairPts,
+ const RegBankSelect::MappingCost *BestCost) {
+ assert((MBFI || !BestCost) && "Costs comparison require MBFI");
+
+ // If mapped with InstrMapping, MI will have the recorded cost.
+ MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
+ bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
+ assert(!Saturated && "Possible mapping saturated the cost");
+ DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
+ DEBUG(dbgs() << "With: " << InstrMapping << '\n');
+ RepairPts.clear();
+ if (BestCost && Cost > *BestCost)
+ return Cost;
+
+ // Moreover, to realize this mapping, the register bank of each operand must
+ // match this mapping. In other words, we may need to locally reassign the
+ // register banks. Account for that repairing cost as well.
+ // In this context, local means in the surrounding of MI.
+ for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ DEBUG(dbgs() << "Opd" << OpIdx);
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ // If Reg is already properly mapped, this is free.
+ bool Assign;
+ if (assignmentMatch(Reg, ValMapping, Assign)) {
+ DEBUG(dbgs() << " is free (match).\n");
+ continue;
+ }
+ if (Assign) {
+ DEBUG(dbgs() << " is free (simple assignment).\n");
+ RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
+ RepairingPlacement::Reassign));
+ continue;
+ }
+
+ // Find the insertion point for the repairing code.
+ RepairPts.emplace_back(
+ RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert));
+ RepairingPlacement &RepairPt = RepairPts.back();
+
+ // If we need to split a basic block to materialize this insertion point,
+ // we may give a higher cost to this mapping.
+ // Nevertheless, we may get away with the split, so try that first.
+ if (RepairPt.hasSplit())
+ tryAvoidingSplit(RepairPt, MO, ValMapping);
+
+ // Check that the materialization of the repairing is possible.
+ if (!RepairPt.canMaterialize())
+ return MappingCost::ImpossibleCost();
+
+ // Account for the split cost and repair cost.
+ // Unless the cost is already saturated or we do not care about the cost.
+ if (!BestCost || Saturated)
+ continue;
+
+ // To get accurate information we need MBFI and MBPI.
+ // Thus, if we end up here this information should be here.
+ assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI");
+
+ // FIXME: We will have to rework the repairing cost model.
+ // The repairing cost depends on the register bank that MO has.
+ // However, when we break down the value into different values,
+ // MO may not have a register bank while still needing repairing.
+ // For the fast mode, we don't compute the cost so that is fine,
+ // but still for the repairing code, we will have to make a choice.
+ // For the greedy mode, we should choose greedily what is the best
+ // choice based on the next use of MO.
+
+ // Sums up the repairing cost of MO at each insertion point.
+ uint64_t RepairCost = getRepairCost(MO, ValMapping);
+ // Bias used for splitting: 5%.
+ const uint64_t PercentageForBias = 5;
+ uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
+ // We should not need more than a couple of instructions to repair
+ // an assignment. In other words, the computation should not
+ // overflow because the repairing cost is free of basic block
+ // frequency.
+ assert(((RepairCost < RepairCost * PercentageForBias) &&
+ (RepairCost * PercentageForBias <
+ RepairCost * PercentageForBias + 99)) &&
+ "Repairing involves more than a billion of instructions?!");
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ assert(InsertPt->canMaterialize() && "We should not have made it here");
+ // We will applied some basic block frequency and those uses uint64_t.
+ if (!InsertPt->isSplit())
+ Saturated = Cost.addLocalCost(RepairCost);
+ else {
+ uint64_t CostForInsertPt = RepairCost;
+ // Again we shouldn't overflow here givent that
+ // CostForInsertPt is frequency free at this point.
+ assert(CostForInsertPt + Bias > CostForInsertPt &&
+ "Repairing + split bias overflows");
+ CostForInsertPt += Bias;
+ uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt;
+ // Check if we just overflowed.
+ if ((Saturated = PtCost < CostForInsertPt))
+ Cost.saturate();
+ else
+ Saturated = Cost.addNonLocalCost(PtCost);
+ }
+
+ // Stop looking into what it takes to repair, this is already
+ // too expensive.
+ if (BestCost && Cost > *BestCost)
+ return Cost;
+
+ // No need to accumulate more cost information.
+ // We need to still gather the repairing information though.
+ if (Saturated)
+ break;
+ }
+ }
+ return Cost;
+}
+
+void RegBankSelect::applyMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
+ // OpdMapper will hold all the information needed for the rewritting.
+ RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
+
+ // First, place the repairing code.
+ for (RepairingPlacement &RepairPt : RepairPts) {
+ assert(RepairPt.canMaterialize() &&
+ RepairPt.getKind() != RepairingPlacement::Impossible &&
+ "This mapping is impossible");
+ assert(RepairPt.getKind() != RepairingPlacement::None &&
+ "This should not make its way in the list");
+ unsigned OpIdx = RepairPt.getOpIdx();
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ unsigned BreakDownSize = ValMapping.BreakDown.size();
+ (void)BreakDownSize;
+ unsigned Reg = MO.getReg();
+
+ switch (RepairPt.getKind()) {
+ case RepairingPlacement::Reassign:
+ assert(BreakDownSize == 1 &&
+ "Reassignment should only be for simple mapping");
+ MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
+ break;
+ case RepairingPlacement::Insert:
+ OpdMapper.createVRegs(OpIdx);
+ repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx));
+ break;
+ default:
+ llvm_unreachable("Other kind should not happen");
+ }
+ }
+ // Second, rewrite the instruction.
+ DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
+ RBI->applyMapping(OpdMapper);
+}
+
+void RegBankSelect::assignInstr(MachineInstr &MI) {
+ DEBUG(dbgs() << "Assign: " << MI);
+ // Remember the repairing placement for all the operands.
+ SmallVector<RepairingPlacement, 4> RepairPts;
+
+ RegisterBankInfo::InstructionMapping BestMapping;
+ if (OptMode == RegBankSelect::Mode::Fast) {
+ BestMapping = RBI->getInstrMapping(MI);
+ MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
+ (void)DefaultCost;
+ assert(DefaultCost != MappingCost::ImpossibleCost() &&
+ "Default mapping is not suited");
+ } else {
+ RegisterBankInfo::InstructionMappings PossibleMappings =
+ RBI->getInstrPossibleMappings(MI);
+ assert(!PossibleMappings.empty() &&
+ "Do not know how to map this instruction");
+ BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
+ }
+ // Make sure the mapping is valid for MI.
+ assert(BestMapping.verify(MI) && "Invalid instruction mapping");
+
+ DEBUG(dbgs() << "Mapping: " << BestMapping << '\n');
+
+ // After this call, MI may not be valid anymore.
+ // Do not use it.
+ applyMapping(MI, BestMapping, RepairPts);
+}
+
+bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ const Function *F = MF.getFunction();
+ Mode SaveOptMode = OptMode;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ OptMode = Mode::Fast;
+ init(MF);
+ // Walk the function and assign register banks to all operands.
+ // Use a RPOT to make sure all registers are assigned before we choose
+ // the best mapping of the current instruction.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // Set a sensible insertion point so that subsequent calls to
+ // MIRBuilder.
+ MIRBuilder.setMBB(*MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
+ MII != End;) {
+ // MI might be invalidated by the assignment, so move the
+ // iterator before hand.
+ assignInstr(*MII++);
+ }
+ }
+ OptMode = SaveOptMode;
+ return false;
+}
+
+//------------------------------------------------------------------------------
+// Helper Classes Implementation
+//------------------------------------------------------------------------------
+RegBankSelect::RepairingPlacement::RepairingPlacement(
+ MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
+ RepairingPlacement::RepairingKind Kind)
+ // Default is, we are going to insert code to repair OpIdx.
+ : Kind(Kind),
+ OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible),
+ HasSplit(false),
+ P(P) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isReg() && "Trying to repair a non-reg operand");
+
+ if (Kind != RepairingKind::Insert)
+ return;
+
+ // Repairings for definitions happen after MI, uses happen before.
+ bool Before = !MO.isDef();
+
+ // Check if we are done with MI.
+ if (!MI.isPHI() && !MI.isTerminator()) {
+ addInsertPoint(MI, Before);
+ // We are done with the initialization.
+ return;
+ }
+
+ // Now, look for the special cases.
+ if (MI.isPHI()) {
+ // - PHI must be the first instructions:
+ // * Before, we have to split the related incoming edge.
+ // * After, move the insertion point past the last phi.
+ if (!Before) {
+ MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI();
+ if (It != MI.getParent()->end())
+ addInsertPoint(*It, /*Before*/ true);
+ else
+ addInsertPoint(*(--It), /*Before*/ false);
+ return;
+ }
+ // We repair a use of a phi, we may need to split the related edge.
+ MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
+ // Check if we can move the insertion point prior to the
+ // terminators of the predecessor.
+ unsigned Reg = MO.getReg();
+ MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
+ for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
+ if (It->modifiesRegister(Reg, &TRI)) {
+ // We cannot hoist the repairing code in the predecessor.
+ // Split the edge.
+ addInsertPoint(Pred, *MI.getParent());
+ return;
+ }
+ // At this point, we can insert in Pred.
+
+ // - If It is invalid, Pred is empty and we can insert in Pred
+ // wherever we want.
+ // - If It is valid, It is the first non-terminator, insert after It.
+ if (It == Pred.end())
+ addInsertPoint(Pred, /*Beginning*/ false);
+ else
+ addInsertPoint(*It, /*Before*/ false);
+ } else {
+ // - Terminators must be the last instructions:
+ // * Before, move the insert point before the first terminator.
+ // * After, we have to split the outcoming edges.
+ unsigned Reg = MO.getReg();
+ if (Before) {
+ // Check whether Reg is defined by any terminator.
+ MachineBasicBlock::iterator It = MI;
+ for (auto Begin = MI.getParent()->begin();
+ --It != Begin && It->isTerminator();)
+ if (It->modifiesRegister(Reg, &TRI)) {
+ // Insert the repairing code right after the definition.
+ addInsertPoint(*It, /*Before*/ false);
+ return;
+ }
+ addInsertPoint(*It, /*Before*/ true);
+ return;
+ }
+ // Make sure Reg is not redefined by other terminators, otherwise
+ // we do not know how to split.
+ for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
+ ++It != End;)
+ // The machine verifier should reject this kind of code.
+ assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split");
+ // Split each outcoming edges.
+ MachineBasicBlock &Src = *MI.getParent();
+ for (auto &Succ : Src.successors())
+ addInsertPoint(Src, Succ);
+ }
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI,
+ bool Before) {
+ addInsertPoint(*new InstrInsertPoint(MI, Before));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB,
+ bool Beginning) {
+ addInsertPoint(*new MBBInsertPoint(MBB, Beginning));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src,
+ MachineBasicBlock &Dst) {
+ addInsertPoint(*new EdgeInsertPoint(Src, Dst, P));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(
+ RegBankSelect::InsertPoint &Point) {
+ CanMaterialize &= Point.canMaterialize();
+ HasSplit |= Point.isSplit();
+ InsertPoints.emplace_back(&Point);
+}
+
+RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr,
+ bool Before)
+ : InsertPoint(), Instr(Instr), Before(Before) {
+ // Since we do not support splitting, we do not need to update
+ // liveness and such, so do not do anything with P.
+ assert((!Before || !Instr.isPHI()) &&
+ "Splitting before phis requires more points");
+ assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) &&
+ "Splitting between phis does not make sense");
+}
+
+void RegBankSelect::InstrInsertPoint::materialize() {
+ if (isSplit()) {
+ // Slice and return the beginning of the new block.
+ // If we need to split between the terminators, we theoritically
+ // need to know where the first and second set of terminators end
+ // to update the successors properly.
+ // Now, in pratice, we should have a maximum of 2 branch
+ // instructions; one conditional and one unconditional. Therefore
+ // we know how to update the successor by looking at the target of
+ // the unconditional branch.
+ // If we end up splitting at some point, then, we should update
+ // the liveness information and such. I.e., we would need to
+ // access P here.
+ // The machine verifier should actually make sure such cases
+ // cannot happen.
+ llvm_unreachable("Not yet implemented");
+ }
+ // Otherwise the insertion point is just the current or next
+ // instruction depending on Before. I.e., there is nothing to do
+ // here.
+}
+
+bool RegBankSelect::InstrInsertPoint::isSplit() const {
+ // If the insertion point is after a terminator, we need to split.
+ if (!Before)
+ return Instr.isTerminator();
+ // If we insert before an instruction that is after a terminator,
+ // we are still after a terminator.
+ return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator();
+}
+
+uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const {
+ // Even if we need to split, because we insert between terminators,
+ // this split has actually the same frequency as the instruction.
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(Instr.getParent()).getFrequency();
+}
+
+uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(&MBB).getFrequency();
+}
+
+void RegBankSelect::EdgeInsertPoint::materialize() {
+ // If we end up repairing twice at the same place before materializing the
+ // insertion point, we may think we have to split an edge twice.
+ // We should have a factory for the insert point such that identical points
+ // are the same instance.
+ assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) &&
+ "This point has already been split");
+ MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P);
+ assert(NewBB && "Invalid call to materialize");
+ // We reuse the destination block to hold the information of the new block.
+ DstOrSplit = NewBB;
+}
+
+uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ if (WasMaterialized)
+ return MBFI->getBlockFreq(DstOrSplit).getFrequency();
+
+ const MachineBranchProbabilityInfo *MBPI =
+ P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
+ if (!MBPI)
+ return 1;
+ // The basic block will be on the edge.
+ return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit))
+ .getFrequency();
+}
+
+bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
+ // If this is not a critical edge, we should not have used this insert
+ // point. Indeed, either the successor or the predecessor should
+ // have do.
+ assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 &&
+ "Edge is not critical");
+ return Src.canSplitCriticalEdge(DstOrSplit);
+}
+
+RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
+ : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+
+bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (LocalCost + Cost < LocalCost) {
+ saturate();
+ return true;
+ }
+ LocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (NonLocalCost + Cost < NonLocalCost) {
+ saturate();
+ return true;
+ }
+ NonLocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::isSaturated() const {
+ return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX &&
+ LocalFreq == UINT64_MAX;
+}
+
+void RegBankSelect::MappingCost::saturate() {
+ *this = ImpossibleCost();
+ --LocalCost;
+}
+
+RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() {
+ return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX);
+}
+
+bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
+ // Sort out the easy cases.
+ if (*this == Cost)
+ return false;
+ // If one is impossible to realize the other is cheaper unless it is
+ // impossible as well.
+ if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost()))
+ return (*this == ImpossibleCost()) < (Cost == ImpossibleCost());
+ // If one is saturated the other is cheaper, unless it is saturated
+ // as well.
+ if (isSaturated() || Cost.isSaturated())
+ return isSaturated() < Cost.isSaturated();
+ // At this point we know both costs hold sensible values.
+
+ // If both values have a different base frequency, there is no much
+ // we can do but to scale everything.
+ // However, if they have the same base frequency we can avoid making
+ // complicated computation.
+ uint64_t ThisLocalAdjust;
+ uint64_t OtherLocalAdjust;
+ if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) {
+
+ // At this point, we know the local costs are comparable.
+ // Do the case that do not involve potential overflow first.
+ if (NonLocalCost == Cost.NonLocalCost)
+ // Since the non-local costs do not discriminate on the result,
+ // just compare the local costs.
+ return LocalCost < Cost.LocalCost;
+
+ // The base costs are comparable so we may only keep the relative
+ // value to increase our chances of avoiding overflows.
+ ThisLocalAdjust = 0;
+ OtherLocalAdjust = 0;
+ if (LocalCost < Cost.LocalCost)
+ OtherLocalAdjust = Cost.LocalCost - LocalCost;
+ else
+ ThisLocalAdjust = LocalCost - Cost.LocalCost;
+
+ } else {
+ ThisLocalAdjust = LocalCost;
+ OtherLocalAdjust = Cost.LocalCost;
+ }
+
+ // The non-local costs are comparable, just keep the relative value.
+ uint64_t ThisNonLocalAdjust = 0;
+ uint64_t OtherNonLocalAdjust = 0;
+ if (NonLocalCost < Cost.NonLocalCost)
+ OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost;
+ else
+ ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost;
+ // Scale everything to make them comparable.
+ uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq;
+ // Check for overflow on that operation.
+ bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust ||
+ ThisScaledCost < LocalFreq);
+ uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq;
+ // Check for overflow on the last operation.
+ bool OtherOverflows =
+ OtherLocalAdjust &&
+ (OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq);
+ // Add the non-local costs.
+ ThisOverflows |= ThisNonLocalAdjust &&
+ ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust;
+ ThisScaledCost += ThisNonLocalAdjust;
+ OtherOverflows |= OtherNonLocalAdjust &&
+ OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust;
+ OtherScaledCost += OtherNonLocalAdjust;
+ // If both overflows, we cannot compare without additional
+ // precision, e.g., APInt. Just give up on that case.
+ if (ThisOverflows && OtherOverflows)
+ return false;
+ // If one overflows but not the other, we can still compare.
+ if (ThisOverflows || OtherOverflows)
+ return ThisOverflows < OtherOverflows;
+ // Otherwise, just compare the values.
+ return ThisScaledCost < OtherScaledCost;
+}
+
+bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
+ return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
+ LocalFreq == Cost.LocalFreq;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
new file mode 100644
index 0000000..a911225
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -0,0 +1,107 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBank class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "registerbank"
+
+using namespace llvm;
+
+const unsigned RegisterBank::InvalidID = UINT_MAX;
+
+RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {}
+
+bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
+ assert(isValid() && "Invalid register bank");
+ assert(ContainedRegClasses.size() == TRI.getNumRegClasses() &&
+ "TRI does not match the initialization process?");
+ for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
+
+ if (!covers(RC))
+ continue;
+ // Verify that the register bank covers all the sub classes of the
+ // classes it covers.
+
+ // Use a different (slow in that case) method than
+ // RegisterBankInfo to find the subclasses of RC, to make sure
+ // both agree on the covers.
+ for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) {
+ const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId);
+
+ if (!RC.hasSubClassEq(&SubRC))
+ continue;
+
+ // Verify that the Size of the register bank is big enough to cover
+ // all the register classes it covers.
+ assert((getSize() >= SubRC.getSize() * 8) &&
+ "Size is not big enough for all the subclasses!");
+ assert(covers(SubRC) && "Not all subclasses are covered");
+ }
+ }
+ return true;
+}
+
+bool RegisterBank::covers(const TargetRegisterClass &RC) const {
+ assert(isValid() && "RB hasn't been initialized yet");
+ return ContainedRegClasses.test(RC.getID());
+}
+
+bool RegisterBank::isValid() const {
+ return ID != InvalidID && Name != nullptr && Size != 0 &&
+ // A register bank that does not cover anything is useless.
+ !ContainedRegClasses.empty();
+}
+
+bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
+ // There must be only one instance of a given register bank alive
+ // for the whole compilation.
+ // The RegisterBankInfo is supposed to enforce that.
+ assert((OtherRB.getID() != getID() || &OtherRB == this) &&
+ "ID does not uniquely identify a RegisterBank");
+ return &OtherRB == this;
+}
+
+void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+ print(dbgs(), /* IsForDebug */ true, TRI);
+}
+
+void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
+ const TargetRegisterInfo *TRI) const {
+ OS << getName();
+ if (!IsForDebug)
+ return;
+ OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n"
+ << "isValid:" << isValid() << '\n'
+ << "Number of Covered register classes: " << ContainedRegClasses.count()
+ << '\n';
+ // Print all the subclasses if we can.
+ // This register classes may not be properly initialized yet.
+ if (!TRI || ContainedRegClasses.empty())
+ return;
+ assert(ContainedRegClasses.size() == TRI->getNumRegClasses() &&
+ "TRI does not match the initialization process?");
+ bool IsFirst = true;
+ OS << "Covered register classes:\n";
+ for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI->getRegClass(RCId);
+
+ if (!covers(RC))
+ continue;
+
+ if (!IsFirst)
+ OS << ", ";
+ OS << TRI->getRegClassName(&RC);
+ IsFirst = false;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
new file mode 100644
index 0000000..ef8e4f6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -0,0 +1,663 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBankInfo class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <algorithm> // For std::max.
+
+#define DEBUG_TYPE "registerbankinfo"
+
+using namespace llvm;
+
+const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
+const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
+
+//------------------------------------------------------------------------------
+// RegisterBankInfo implementation.
+//------------------------------------------------------------------------------
+RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks)
+ : NumRegBanks(NumRegBanks) {
+ RegBanks.reset(new RegisterBank[NumRegBanks]);
+}
+
+bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
+ DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+ const RegisterBank &RegBank = getRegBank(Idx);
+ assert(Idx == RegBank.getID() &&
+ "ID does not match the index in the array");
+ dbgs() << "Verify " << RegBank << '\n';
+ assert(RegBank.verify(TRI) && "RegBank is invalid");
+ });
+ return true;
+}
+
+void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) {
+ DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name
+ << "\"\n");
+ RegisterBank &RegBank = getRegBank(ID);
+ assert(RegBank.getID() == RegisterBank::InvalidID &&
+ "A register bank should be created only once");
+ RegBank.ID = ID;
+ RegBank.Name = Name;
+}
+
+void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
+ const TargetRegisterInfo &TRI,
+ bool AddTypeMapping) {
+ RegisterBank &RB = getRegBank(ID);
+ unsigned NbOfRegClasses = TRI.getNumRegClasses();
+
+ DEBUG(dbgs() << "Add coverage for: " << RB << '\n');
+
+ // Check if RB is underconstruction.
+ if (!RB.isValid())
+ RB.ContainedRegClasses.resize(NbOfRegClasses);
+ else if (RB.covers(*TRI.getRegClass(RCId)))
+ // If RB already covers this register class, there is nothing
+ // to do.
+ return;
+
+ BitVector &Covered = RB.ContainedRegClasses;
+ SmallVector<unsigned, 8> WorkList;
+
+ WorkList.push_back(RCId);
+ Covered.set(RCId);
+
+ unsigned &MaxSize = RB.Size;
+ do {
+ unsigned RCId = WorkList.pop_back_val();
+
+ const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId);
+
+ DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC)
+ << "(Size*8: " << (CurRC.getSize() * 8) << ")\n");
+
+ // Remember the biggest size in bits.
+ MaxSize = std::max(MaxSize, CurRC.getSize() * 8);
+
+ // If we have been asked to record the type supported by this
+ // register bank, do it now.
+ if (AddTypeMapping)
+ for (MVT::SimpleValueType SVT :
+ make_range(CurRC.vt_begin(), CurRC.vt_end()))
+ recordRegBankForType(getRegBank(ID), SVT);
+
+ // Walk through all sub register classes and push them into the worklist.
+ bool First = true;
+ for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid();
+ ++It) {
+ unsigned SubRCId = It.getID();
+ if (!Covered.test(SubRCId)) {
+ if (First)
+ DEBUG(dbgs() << " Enqueue sub-class: ");
+ DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", ");
+ WorkList.push_back(SubRCId);
+ // Remember that we saw the sub class.
+ Covered.set(SubRCId);
+ First = false;
+ }
+ }
+ if (!First)
+ DEBUG(dbgs() << '\n');
+
+ // Push also all the register classes that can be accessed via a
+ // subreg index, i.e., its subreg-class (which is different than
+ // its subclass).
+ //
+ // Note: It would probably be faster to go the other way around
+ // and have this method add only super classes, since this
+ // information is available in a more efficient way. However, it
+ // feels less natural for the client of this APIs plus we will
+ // TableGen the whole bitset at some point, so compile time for
+ // the initialization is not very important.
+ First = true;
+ for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) {
+ if (Covered.test(SubRCId))
+ continue;
+ bool Pushed = false;
+ const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId);
+ for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid();
+ ++SuperRCIt) {
+ if (Pushed)
+ break;
+ for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid();
+ ++It) {
+ unsigned SuperRCId = It.getID();
+ if (SuperRCId == RCId) {
+ if (First)
+ DEBUG(dbgs() << " Enqueue subreg-class: ");
+ DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", ");
+ WorkList.push_back(SubRCId);
+ // Remember that we saw the sub class.
+ Covered.set(SubRCId);
+ Pushed = true;
+ First = false;
+ break;
+ }
+ }
+ }
+ }
+ if (!First)
+ DEBUG(dbgs() << '\n');
+ } while (!WorkList.empty());
+}
+
+const RegisterBank *
+RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg));
+
+ assert(Reg && "NoRegister does not have a register bank");
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (RegClassOrBank.is<const RegisterBank *>())
+ return RegClassOrBank.get<const RegisterBank *>();
+ const TargetRegisterClass *RC =
+ RegClassOrBank.get<const TargetRegisterClass *>();
+ if (RC)
+ return &getRegBankFromRegClass(*RC);
+ return nullptr;
+}
+
+const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
+ const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) const {
+ // The mapping of the registers may be available via the
+ // register class constraints.
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI);
+
+ if (!RC)
+ return nullptr;
+
+ const RegisterBank &RegBank = getRegBankFromRegClass(*RC);
+ // Sanity check that the target properly implemented getRegBankFromRegClass.
+ assert(RegBank.covers(*RC) &&
+ "The mapping of the register bank does not make sense");
+ return &RegBank;
+}
+
+RegisterBankInfo::InstructionMapping
+RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
+ MI.getNumOperands());
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ // We may need to query the instruction encoding to guess the mapping.
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Before doing anything complicated check if the mapping is not
+ // directly available.
+ bool CompleteMapping = true;
+ // For copies we want to walk over the operands and try to find one
+ // that has a register bank.
+ bool isCopyLike = MI.isCopy() || MI.isPHI();
+ // Remember the register bank for reuse for copy-like instructions.
+ const RegisterBank *RegBank = nullptr;
+ // Remember the size of the register for reuse for copy-like instructions.
+ unsigned RegSize = 0;
+ for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ // The register bank of Reg is just a side effect of the current
+ // excution and in particular, there is no reason to believe this
+ // is the best default mapping for the current instruction. Keep
+ // it as an alternative register bank if we cannot figure out
+ // something.
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ // For copy-like instruction, we want to reuse the register bank
+ // that is already set on Reg, if any, since those instructions do
+ // not have any constraints.
+ const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr;
+ if (!CurRegBank) {
+ // If this is a target specific instruction, we can deduce
+ // the register bank from the encoding constraints.
+ CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
+ if (!CurRegBank) {
+ // Check if we can deduce the register bank from the type of
+ // the instruction.
+ Type *MITy = MI.getType();
+ if (MITy)
+ CurRegBank = getRegBankForType(
+ MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy);
+ if (!CurRegBank)
+ // Use the current assigned register bank.
+ // That may not make much sense though.
+ CurRegBank = AltRegBank;
+ if (!CurRegBank) {
+ // All our attempts failed, give up.
+ CompleteMapping = false;
+
+ if (!isCopyLike)
+ // MI does not carry enough information to guess the mapping.
+ return InstructionMapping();
+
+ // For copies, we want to keep interating to find a register
+ // bank for the other operands if we did not find one yet.
+ if (RegBank)
+ break;
+ continue;
+ }
+ }
+ }
+ RegBank = CurRegBank;
+ RegSize = getSizeInBits(Reg, MRI, TRI);
+ Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank);
+ }
+
+ if (CompleteMapping)
+ return Mapping;
+
+ assert(isCopyLike && "We should have bailed on non-copies at this point");
+ // For copy like instruction, if none of the operand has a register
+ // bank avialable, there is nothing we can propagate.
+ if (!RegBank)
+ return InstructionMapping();
+
+ // This is a copy-like instruction.
+ // Propagate RegBank to all operands that do not have a
+ // mapping yet.
+ for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ // Don't assign a mapping for non-reg operands.
+ if (!MO.isReg())
+ continue;
+
+ // If a mapping already exists, do not touch it.
+ if (!static_cast<const InstructionMapping *>(&Mapping)
+ ->getOperandMapping(OpIdx)
+ .BreakDown.empty())
+ continue;
+
+ Mapping.setOperandMapping(OpIdx, RegSize, *RegBank);
+ }
+ return Mapping;
+}
+
+RegisterBankInfo::InstructionMapping
+RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ llvm_unreachable("The target must implement this");
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
+ InstructionMappings PossibleMappings;
+ // Put the default mapping first.
+ PossibleMappings.push_back(getInstrMapping(MI));
+ // Then the alternative mapping, if any.
+ InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
+ for (InstructionMapping &AltMapping : AltMappings)
+ PossibleMappings.emplace_back(std::move(AltMapping));
+#ifndef NDEBUG
+ for (const InstructionMapping &Mapping : PossibleMappings)
+ assert(Mapping.verify(MI) && "Mapping is invalid");
+#endif
+ return PossibleMappings;
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
+ // No alternative for MI.
+ return InstructionMappings();
+}
+
+void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
+ MachineInstr &MI = OpdMapper.getMI();
+ DEBUG(dbgs() << "Applying default-like mapping\n");
+ for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+ ++OpIdx) {
+ DEBUG(dbgs() << "OpIdx " << OpIdx);
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ DEBUG(dbgs() << " is not a register, nothing to be done\n");
+ continue;
+ }
+ assert(
+ OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() ==
+ 1 &&
+ "This mapping is too complex for this function");
+ iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
+ OpdMapper.getVRegs(OpIdx);
+ if (NewRegs.begin() == NewRegs.end()) {
+ DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
+ continue;
+ }
+ DEBUG(dbgs() << " changed, replace " << MO.getReg());
+ MO.setReg(*NewRegs.begin());
+ DEBUG(dbgs() << " with " << MO.getReg());
+ }
+}
+
+unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+ const TargetRegisterClass *RC = nullptr;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // The size is not directly available for physical registers.
+ // Instead, we need to access a register class that contains Reg and
+ // get the size of that register class.
+ RC = TRI.getMinimalPhysRegClass(Reg);
+ } else {
+ unsigned RegSize = MRI.getSize(Reg);
+ // If Reg is not a generic register, query the register class to
+ // get its size.
+ if (RegSize)
+ return RegSize;
+ // Since Reg is not a generic register, it must have a register class.
+ RC = MRI.getRegClass(Reg);
+ }
+ assert(RC && "Unable to deduce the register class");
+ return RC->getSize() * 8;
+}
+
+//------------------------------------------------------------------------------
+// Helper classes implementation.
+//------------------------------------------------------------------------------
+void RegisterBankInfo::PartialMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+bool RegisterBankInfo::PartialMapping::verify() const {
+ assert(RegBank && "Register bank not set");
+ assert(Length && "Empty mapping");
+ assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?");
+ // Check if the minimum width fits into RegBank.
+ assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
+ return true;
+}
+
+void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
+ OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = ";
+ if (RegBank)
+ OS << *RegBank;
+ else
+ OS << "nullptr";
+}
+
+bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
+ assert(!BreakDown.empty() && "Value mapped nowhere?!");
+ unsigned OrigValueBitWidth = 0;
+ for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ // Check that each register bank is big enough to hold the partial value:
+ // this check is done by PartialMapping::verify
+ assert(PartMap.verify() && "Partial mapping is invalid");
+ // The original value should completely be mapped.
+ // Thus the maximum accessed index + 1 is the size of the original value.
+ OrigValueBitWidth =
+ std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
+ }
+ assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match");
+ APInt ValueMask(OrigValueBitWidth, 0);
+ for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ // Check that the union of the partial mappings covers the whole value,
+ // without overlaps.
+ // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
+ APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx,
+ PartMap.getHighBitIdx() + 1);
+ ValueMask ^= PartMapMask;
+ assert((ValueMask & PartMapMask) == PartMapMask &&
+ "Some partial mappings overlap");
+ }
+ assert(ValueMask.isAllOnesValue() && "Value is not fully mapped");
+ return true;
+}
+
+void RegisterBankInfo::ValueMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
+ OS << "#BreakDown: " << BreakDown.size() << " ";
+ bool IsFirst = true;
+ for (const PartialMapping &PartMap : BreakDown) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '[' << PartMap << ']';
+ IsFirst = false;
+ }
+}
+
+void RegisterBankInfo::InstructionMapping::setOperandMapping(
+ unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) {
+ // Build the value mapping.
+ assert(MaskSize <= RegBank.getSize() && "Register bank is too small");
+
+ // Create the mapping object.
+ getOperandMapping(OpIdx).BreakDown.push_back(
+ PartialMapping(0, MaskSize, RegBank));
+}
+
+bool RegisterBankInfo::InstructionMapping::verify(
+ const MachineInstr &MI) const {
+ // Check that all the register operands are properly mapped.
+ // Check the constructor invariant.
+ assert(NumOperands == MI.getNumOperands() &&
+ "NumOperands must match, see constructor");
+ assert(MI.getParent() && MI.getParent()->getParent() &&
+ "MI must be connected to a MachineFunction");
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ (void)MF;
+
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ const MachineOperand &MO = MI.getOperand(Idx);
+ const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+ (void)MOMapping;
+ if (!MO.isReg()) {
+ assert(MOMapping.BreakDown.empty() &&
+ "We should not care about non-reg mapping");
+ continue;
+ }
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ // Register size in bits.
+ // This size must match what the mapping expects.
+ assert(MOMapping.verify(getSizeInBits(
+ Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
+ "Value mapping is invalid");
+ }
+ return true;
+}
+
+void RegisterBankInfo::InstructionMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
+ OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
+
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ const ValueMapping &ValMapping = getOperandMapping(OpIdx);
+ if (OpIdx)
+ OS << ", ";
+ OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}';
+ }
+}
+
+const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1;
+
+RegisterBankInfo::OperandsMapper::OperandsMapper(
+ MachineInstr &MI, const InstructionMapping &InstrMapping,
+ MachineRegisterInfo &MRI)
+ : MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
+ unsigned NumOpds = MI.getNumOperands();
+ OpToNewVRegIdx.reset(new int[NumOpds]);
+ std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds],
+ OperandsMapper::DontKnowIdx);
+ assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
+}
+
+iterator_range<SmallVectorImpl<unsigned>::iterator>
+RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ unsigned NumPartialVal =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx) {
+ // This is the first time we try to access OpIdx.
+ // Create the cells that will hold all the partial values at the
+ // end of the list of NewVReg.
+ StartIdx = NewVRegs.size();
+ OpToNewVRegIdx[OpIdx] = StartIdx;
+ for (unsigned i = 0; i < NumPartialVal; ++i)
+ NewVRegs.push_back(0);
+ }
+ SmallVectorImpl<unsigned>::iterator End =
+ getNewVRegsEnd(StartIdx, NumPartialVal);
+
+ return make_range(&NewVRegs[StartIdx], End);
+}
+
+SmallVectorImpl<unsigned>::const_iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) const {
+ return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
+}
+SmallVectorImpl<unsigned>::iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) {
+ assert((NewVRegs.size() == StartIdx + NumVal ||
+ NewVRegs.size() > StartIdx + NumVal) &&
+ "NewVRegs too small to contain all the partial mapping");
+ return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end()
+ : &NewVRegs[StartIdx + NumVal];
+}
+
+void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
+ getVRegsMem(OpIdx);
+ const SmallVectorImpl<PartialMapping> &PartMapList =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown;
+ SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin();
+ for (unsigned &NewVReg : NewVRegsForOpIdx) {
+ assert(PartMap != PartMapList.end() && "Out-of-bound access");
+ assert(NewVReg == 0 && "Register has already been created");
+ NewVReg = MRI.createGenericVirtualRegister(PartMap->Length);
+ MRI.setRegBank(NewVReg, *PartMap->RegBank);
+ ++PartMap;
+ }
+}
+
+void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
+ unsigned PartialMapIdx,
+ unsigned NewVReg) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+ PartialMapIdx &&
+ "Out-of-bound access for partial mapping");
+ // Make sure the memory is initialized for that operand.
+ (void)getVRegsMem(OpIdx);
+ assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 &&
+ "This value is already set");
+ NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
+}
+
+iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
+ bool ForDebug) const {
+ (void)ForDebug;
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx)
+ return make_range(NewVRegs.end(), NewVRegs.end());
+
+ unsigned PartMapSize =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ SmallVectorImpl<unsigned>::const_iterator End =
+ getNewVRegsEnd(StartIdx, PartMapSize);
+ iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
+ make_range(&NewVRegs[StartIdx], End);
+#ifndef NDEBUG
+ for (unsigned VReg : Res)
+ assert((VReg || ForDebug) && "Some registers are uninitialized");
+#endif
+ return Res;
+}
+
+void RegisterBankInfo::OperandsMapper::dump() const {
+ print(dbgs(), true);
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
+ bool ForDebug) const {
+ unsigned NumOpds = getMI().getNumOperands();
+ if (ForDebug) {
+ OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
+ // Print out the internal state of the index table.
+ OS << "Populated indices (CellNumber, IndexInNewVRegs): ";
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] != DontKnowIdx) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')';
+ IsFirst = false;
+ }
+ }
+ OS << '\n';
+ } else
+ OS << "Mapping ID: " << getInstrMapping().getID() << ' ';
+
+ OS << "Operand Mapping: ";
+ // If we have a function, we can pretty print the name of the registers.
+ // Otherwise we will print the raw numbers.
+ const TargetRegisterInfo *TRI =
+ getMI().getParent() && getMI().getParent()->getParent()
+ ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo()
+ : nullptr;
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] == DontKnowIdx)
+ continue;
+ if (!IsFirst)
+ OS << ", ";
+ IsFirst = false;
+ OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
+ bool IsFirstNewVReg = true;
+ for (unsigned VReg : getVRegs(Idx)) {
+ if (!IsFirstNewVReg)
+ OS << ", ";
+ IsFirstNewVReg = false;
+ OS << PrintReg(VReg, TRI);
+ }
+ OS << "])";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index dd9a840..8c760b7 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -59,7 +59,6 @@
// We use heuristics to discover the best global grouping we can (cf cl::opts).
// ===---------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -92,6 +91,11 @@ EnableGlobalMerge("enable-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"),
cl::init(true));
+static cl::opt<unsigned>
+GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
+ cl::desc("Set maximum offset for global merge pass"),
+ cl::init(0));
+
static cl::opt<bool> GlobalMergeGroupByUse(
"global-merge-group-by-use", cl::Hidden,
cl::desc("Improve global merge pass to look at uses"), cl::init(true));
@@ -131,6 +135,8 @@ namespace {
/// Whether we should merge global variables that have external linkage.
bool MergeExternalGlobals;
+ bool IsMachO;
+
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
/// \brief Merge everything in \p Globals for which the corresponding bit
@@ -158,10 +164,14 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = nullptr,
- unsigned MaximalOffset = 0,
- bool OnlyOptimizeForSize = false,
- bool MergeExternalGlobals = false)
+ explicit GlobalMerge()
+ : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset),
+ OnlyOptimizeForSize(false), MergeExternalGlobals(false) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
+ bool OnlyOptimizeForSize, bool MergeExternalGlobals)
: FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
OnlyOptimizeForSize(OnlyOptimizeForSize),
MergeExternalGlobals(MergeExternalGlobals) {
@@ -459,8 +469,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
// we can also emit an alias for internal linkage as it's safe to do so.
// It's not safe on Mach-O as the alias (and thus the portion of the
// MergedGlobals variable) may be dead stripped at link time.
- if (Linkage != GlobalValue::InternalLinkage ||
- !TM->getTargetTriple().isOSBinFormatMachO()) {
+ if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
}
@@ -513,6 +522,8 @@ bool GlobalMerge::doInitialization(Module &M) {
if (!EnableGlobalMerge)
return false;
+ IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
+
auto &DL = M.getDataLayout();
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
@@ -550,7 +561,8 @@ bool GlobalMerge::doInitialization(Module &M) {
continue;
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
- if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
+ if (TM &&
+ TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
BSSGlobals[AddressSpace].push_back(&GV);
else if (GV.isConstant())
ConstGlobals[AddressSpace].push_back(&GV);
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index c38c9d2..d225162 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the machine instruction level if-conversion pass.
+// This file implements the machine instruction level if-conversion pass, which
+// tries to convert conditional branches into predicated instructions.
//
//===----------------------------------------------------------------------===//
@@ -33,6 +34,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <utility>
using namespace llvm;
@@ -85,7 +87,7 @@ namespace {
/// BBInfo - One per MachineBasicBlock, this is used to cache the result
/// if-conversion feasibility analysis. This includes results from
- /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
/// classification, and common tail block of its successors (if it's a
/// diamond shape), its size, whether it's predicable, and whether any
/// instruction can clobber the 'would-be' predicate.
@@ -94,7 +96,7 @@ namespace {
/// IsBeingAnalyzed - True if BB is currently being analyzed.
/// IsAnalyzed - True if BB has been analyzed (info is still valid).
/// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
- /// IsBrAnalyzable - True if AnalyzeBranch() returns false.
+ /// IsBrAnalyzable - True if analyzeBranch() returns false.
/// HasFallThrough - True if BB may fallthrough to the following BB.
/// IsUnpredicable - True if BB is known to be unpredicable.
/// ClobbersPred - True if BB could modify predicates (e.g. has
@@ -103,7 +105,7 @@ namespace {
/// ExtraCost - Extra cost for multi-cycle instructions.
/// ExtraCost2 - Some instructions are slower when predicated
/// BB - Corresponding MachineBasicBlock.
- /// TrueBB / FalseBB- See AnalyzeBranch().
+ /// TrueBB / FalseBB- See analyzeBranch().
/// BrCond - Conditions for end of block conditional branches.
/// Predicate - Predicate used in the BB.
struct BBInfo {
@@ -161,7 +163,6 @@ namespace {
const TargetLoweringBase *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
MachineRegisterInfo *MRI;
@@ -176,7 +177,7 @@ namespace {
public:
static char ID;
IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
- : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(Ftor) {
+ : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
initializeIfConverterPass(*PassRegistry::getPassRegistry());
}
@@ -188,6 +189,11 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
private:
bool ReverseBranchCondition(BBInfo &BBI);
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
@@ -198,10 +204,12 @@ namespace {
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
- void AnalyzeBlock(MachineBasicBlock *MBB, std::vector<IfcvtToken*> &Tokens);
+ void AnalyzeBlock(MachineBasicBlock *MBB,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
bool isTriangle = false, bool RevBranch = false);
- void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+ void AnalyzeBlocks(MachineFunction &MF,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
void InvalidatePreds(MachineBasicBlock *BB);
void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
@@ -240,7 +248,8 @@ namespace {
}
// IfcvtTokenCmp - Used to sort if-conversion candidates.
- static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+ static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
+ const std::unique_ptr<IfcvtToken> &C2) {
int Incr1 = (C1->Kind == ICDiamond)
? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
int Incr2 = (C2->Kind == ICDiamond)
@@ -273,14 +282,15 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()) ||
+ (PredicateFtor && !PredicateFtor(*MF.getFunction())))
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
TLI = ST.getTargetLowering();
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MRI = &MF.getRegInfo();
SchedModel.init(ST.getSchedModel(), &ST, TII);
@@ -292,7 +302,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false, *MBFI, *MBPI);
+ BranchFolder BF(true, false, MBFI, *MBPI);
BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -309,7 +319,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
MF.RenumberBlocks();
BBAnalysis.resize(MF.getNumBlockIDs());
- std::vector<IfcvtToken*> Tokens;
+ std::vector<std::unique_ptr<IfcvtToken>> Tokens;
MadeChange = false;
unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
@@ -319,15 +329,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool Change = false;
AnalyzeBlocks(MF, Tokens);
while (!Tokens.empty()) {
- IfcvtToken *Token = Tokens.back();
+ std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back());
Tokens.pop_back();
BBInfo &BBI = Token->BBI;
IfcvtKind Kind = Token->Kind;
unsigned NumDups = Token->NumDups;
unsigned NumDups2 = Token->NumDups2;
- delete Token;
-
// If the block has been evicted out of the queue or it has already been
// marked dead (due to it being predicated), then skip it.
if (BBI.IsDone)
@@ -414,18 +422,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
MadeChange |= Change;
}
- // Delete tokens in case of early exit.
- while (!Tokens.empty()) {
- IfcvtToken *Token = Tokens.back();
- Tokens.pop_back();
- delete Token;
- }
-
Tokens.clear();
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false, false, *MBFI, *MBPI);
+ BranchFolder BF(false, false, MBFI, *MBPI);
BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
@@ -586,7 +587,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
if (FIB == FIE)
break;
}
- if (!TIB->isIdenticalTo(FIB))
+ if (!TIB->isIdenticalTo(*FIB))
break;
++Dups1;
++TIB;
@@ -595,15 +596,19 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
// Now, in preparation for counting duplicate instructions at the ends of the
// blocks, move the end iterators up past any branch instructions.
- while (TIE != TIB) {
- --TIE;
- if (!TIE->isBranch())
- break;
- }
- while (FIE != FIB) {
- --FIE;
- if (!FIE->isBranch())
- break;
+ // If both blocks are returning don't skip the branches, since they will
+ // likely be both identical return instructions. In such cases the return
+ // can be left unpredicated.
+ // Check for already containing all of the block.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+ --TIE;
+ --FIE;
+ if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
+ while (TIE != TIB && TIE->isBranch())
+ --TIE;
+ while (FIE != FIB && FIE->isBranch())
+ --FIE;
}
// If Dups1 includes all of a block, then don't count duplicate
@@ -626,7 +631,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
if (FIE == FIB)
break;
}
- if (!TIE->isIdenticalTo(FIE))
+ if (!TIE->isIdenticalTo(*FIE))
break;
++Dups2;
--TIE;
@@ -650,7 +655,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
BBI.TrueBB = BBI.FalseBB = nullptr;
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
- !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
if (BBI.BrCond.size()) {
@@ -670,16 +675,45 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
BBI.ExtraCost = 0;
BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
- for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
- I != E; ++I) {
- if (I->isDebugValue())
+ for (auto &MI : *BBI.BB) {
+ if (MI.isDebugValue())
continue;
- if (I->isNotDuplicable())
+ // It's unsafe to duplicate convergent instructions in this context, so set
+ // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
+ // following CFG, which is subject to our "simple" transformation.
+ //
+ // BB0 // if (c1) goto BB1; else goto BB2;
+ // / \
+ // BB1 |
+ // | BB2 // if (c2) goto TBB; else goto FBB;
+ // | / |
+ // | / |
+ // TBB |
+ // | |
+ // | FBB
+ // |
+ // exit
+ //
+ // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd
+ // be unconditional, and in BB2, they'd be predicated upon c2), and suppose
+ // TBB contains a convergent instruction. This is safe iff doing so does
+ // not add a control-flow dependency to the convergent instruction -- i.e.,
+ // it's safe iff the set of control flows that leads us to the convergent
+ // instruction does not get smaller after the transformation.
+ //
+ // Originally we executed TBB if c1 || c2. After the transformation, there
+ // are two copies of TBB's instructions. We get to the first if c1, and we
+ // get to the second if !c1 && c2.
+ //
+ // There are clearly fewer ways to satisfy the condition "c1" than
+ // "c1 || c2". Since we've shrunk the set of control flows which lead to
+ // our convergent instruction, the transformation is unsafe.
+ if (MI.isNotDuplicable() || MI.isConvergent())
BBI.CannotBeCopied = true;
- bool isPredicated = TII->isPredicated(I);
- bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
+ bool isPredicated = TII->isPredicated(MI);
+ bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
// A conditional branch is not predicable, but it may be eliminated.
if (isCondBr)
@@ -687,8 +721,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (!isPredicated) {
BBI.NonPredSize++;
- unsigned ExtraPredCost = TII->getPredicationCost(&*I);
- unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false);
+ unsigned ExtraPredCost = TII->getPredicationCost(MI);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false);
if (NumCycles > 1)
BBI.ExtraCost += NumCycles-1;
BBI.ExtraCost2 += ExtraPredCost;
@@ -712,10 +746,10 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
// still potentially predicable.
std::vector<MachineOperand> PredDefs;
- if (TII->DefinesPredicate(I, PredDefs))
+ if (TII->DefinesPredicate(MI, PredDefs))
BBI.ClobbersPred = true;
- if (!TII->isPredicable(I)) {
+ if (!TII->isPredicable(MI)) {
BBI.IsUnpredicable = true;
return;
}
@@ -764,8 +798,8 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
/// the specified block. Record its successors and whether it looks like an
/// if-conversion candidate.
-void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
- std::vector<IfcvtToken*> &Tokens) {
+void IfConverter::AnalyzeBlock(
+ MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
struct BBState {
BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
MachineBasicBlock *MBB;
@@ -863,8 +897,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
// \ /
// TailBB
// Note TailBB can be empty.
- Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
- Dups2));
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
Enqueued = true;
}
@@ -879,7 +913,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
// | TBB
// | /
// FBB
- Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
Enqueued = true;
}
@@ -887,7 +922,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
- Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
@@ -902,7 +938,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
// | TBB---> exit
// |
// FBB
- Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
Enqueued = true;
}
@@ -914,7 +951,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
- Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
+ FNeedSub, Dups));
Enqueued = true;
}
@@ -924,7 +962,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
- Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
@@ -933,7 +972,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
- Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
}
}
@@ -947,8 +987,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
/// candidates.
-void IfConverter::AnalyzeBlocks(MachineFunction &MF,
- std::vector<IfcvtToken*> &Tokens) {
+void IfConverter::AnalyzeBlocks(
+ MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
for (auto &BB : MF)
AnalyzeBlock(&BB, Tokens);
@@ -1001,15 +1041,15 @@ static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond))
BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
}
/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
/// values defined in MI which are not live/used by MI.
-static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) {
+static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
- Redefs.stepForward(*MI, Clobbers);
+ Redefs.stepForward(MI, Clobbers);
// Now add the implicit uses for each of the clobbered values.
for (auto Reg : Clobbers) {
@@ -1046,7 +1086,7 @@ static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) {
* Remove kill flags from operands with a registers in the @p DontKill set.
*/
static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
- for (MIBundleOperands O(&MI); O.isValid(); ++O) {
+ for (MIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->isKill())
continue;
if (DontKill.contains(O->getReg()))
@@ -1097,13 +1137,13 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentiall redefined by
// predicated instructions.
Redefs.init(TRI);
- Redefs.addLiveIns(CvtBBI->BB);
- Redefs.addLiveIns(NextBBI->BB);
+ Redefs.addLiveIns(*CvtBBI->BB);
+ Redefs.addLiveIns(*NextBBI->BB);
// Compute a set of registers which must not be killed by instructions in
// BB1: This is everything live-in to BB2.
DontKill.init(TRI);
- DontKill.addLiveIns(NextBBI->BB);
+ DontKill.addLiveIns(*NextBBI->BB);
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -1202,8 +1242,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.init(TRI);
- Redefs.addLiveIns(CvtBBI->BB);
- Redefs.addLiveIns(NextBBI->BB);
+ Redefs.addLiveIns(*CvtBBI->BB);
+ Redefs.addLiveIns(*NextBBI->BB);
DontKill.clear();
@@ -1357,7 +1397,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
Redefs.init(TRI);
- Redefs.addLiveIns(BBI1->BB);
+ Redefs.addLiveIns(*BBI1->BB);
// Remove the duplicated instructions at the beginnings of both paths.
// Skip dbg_value instructions
@@ -1395,8 +1435,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
- // Remove branch from 'true' block and remove duplicated instructions.
- BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ // Remove branch from the 'true' block, unless it was not analyzable.
+ // Non-analyzable branches need to be preserved, since in such cases,
+ // the CFG structure is not an actual diamond (the join block may not
+ // be present).
+ if (BBI1->IsBrAnalyzable)
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ // Remove duplicated instructions.
DI1 = BBI1->BB->end();
for (unsigned i = 0; i != NumDups2; ) {
// NumDups2 only counted non-dbg_value instructions, so this won't
@@ -1413,8 +1458,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// must be removed.
RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
- // Remove 'false' block branch and find the last instruction to predicate.
- BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ // Remove 'false' block branch (unless it was not analyzable), and find
+ // the last instruction to predicate.
+ if (BBI2->IsBrAnalyzable)
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
// NumDups2 only counted non-dbg_value instructions, so this won't
@@ -1473,6 +1520,18 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Predicate the 'true' block.
PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
+ // After predicating BBI1, if there is a predicated terminator in BBI1 and
+ // a non-predicated in BBI2, then we don't want to predicate the one from
+ // BBI2. The reason is that if we merged these blocks, we would end up with
+ // two predicated terminators in the same block.
+ if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
+ MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
+ MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
+ if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
+ BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
+ --DI2;
+ }
+
// Predicate the 'false' block.
PredicateBlock(*BBI2, DI2, *Cond2);
@@ -1488,6 +1547,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
bool CanMergeTail = !TailBBI.HasFallThrough &&
!TailBBI.BB->hasAddressTaken();
+ // The if-converted block can still have a predicated terminator
+ // (e.g. a predicated return). If that is the case, we cannot merge
+ // it with the tail block.
+ MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
+ if (TI != BBI.BB->end() && TII->isPredicated(*TI))
+ CanMergeTail = false;
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
unsigned NumPreds = TailBB->pred_size();
@@ -1523,14 +1588,14 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return true;
}
-static bool MaySpeculate(const MachineInstr *MI,
+static bool MaySpeculate(const MachineInstr &MI,
SmallSet<unsigned, 4> &LaterRedefs) {
bool SawStore = true;
- if (!MI->isSafeToMove(nullptr, SawStore))
+ if (!MI.isSafeToMove(nullptr, SawStore))
return false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -1551,8 +1616,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
SmallSet<unsigned, 4> *LaterRedefs) {
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != nullptr;
- for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
- if (I->isDebugValue() || TII->isPredicated(I))
+ for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
+ if (I.isDebugValue() || TII->isPredicated(I))
continue;
// It may be possible not to predicate an instruction if it's the 'true'
// side of a diamond and the 'false' side may re-define the instruction's
@@ -1566,7 +1631,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
MaySpec = false;
if (!TII->PredicateInstruction(I, Cond)) {
#ifndef NDEBUG
- dbgs() << "Unable to predicate " << *I << "!\n";
+ dbgs() << "Unable to predicate " << I << "!\n";
#endif
llvm_unreachable(nullptr);
}
@@ -1593,25 +1658,24 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
bool IgnoreBr) {
MachineFunction &MF = *ToBBI.BB->getParent();
- for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
- E = FromBBI.BB->end(); I != E; ++I) {
+ for (auto &I : *FromBBI.BB) {
// Do not copy the end of the block branches.
- if (IgnoreBr && I->isBranch())
+ if (IgnoreBr && I.isBranch())
break;
- MachineInstr *MI = MF.CloneMachineInstr(I);
+ MachineInstr *MI = MF.CloneMachineInstr(&I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
- unsigned ExtraPredCost = TII->getPredicationCost(&*I);
- unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false);
+ unsigned ExtraPredCost = TII->getPredicationCost(I);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
if (NumCycles > 1)
ToBBI.ExtraCost += NumCycles-1;
ToBBI.ExtraCost2 += ExtraPredCost;
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
- if (!TII->PredicateInstruction(MI, Cond)) {
+ if (!TII->PredicateInstruction(*MI, Cond)) {
#ifndef NDEBUG
- dbgs() << "Unable to predicate " << *I << "!\n";
+ dbgs() << "Unable to predicate " << I << "!\n";
#endif
llvm_unreachable(nullptr);
}
@@ -1619,7 +1683,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
// If the predicated instruction now redefines a register as the result of
// if-conversion, add an implicit kill.
- UpdatePredRedefs(MI, Redefs);
+ UpdatePredRedefs(*MI, Redefs);
// Some kill flags may not be correct anymore.
if (!DontKill.empty())
@@ -1659,8 +1723,16 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
assert(!FromBBI.BB->hasAddressTaken() &&
"Removing a BB whose address is taken!");
- ToBBI.BB->splice(ToBBI.BB->end(),
- FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+ // In case FromBBI.BB contains terminators (e.g. return instruction),
+ // first move the non-terminator instructions, then the terminators.
+ MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
+ MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
+ ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
+
+ // If FromBB has non-predicated terminator we should copy it at the end.
+ if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
+ ToTI = ToBBI.BB->end();
+ ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
// Force normalizing the successors' probabilities of ToBBI.BB to convert all
// unknown probabilities into known ones.
@@ -1768,5 +1840,5 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
FunctionPass *
llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
- return new IfConverter(Ftor);
+ return new IfConverter(std::move(Ftor));
}
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 39c1b9f..31d6bd0 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -46,10 +47,9 @@
using namespace llvm;
-static cl::opt<unsigned> PageSize("imp-null-check-page-size",
- cl::desc("The page size of the target in "
- "bytes"),
- cl::init(4096));
+static cl::opt<int> PageSize("imp-null-check-page-size",
+ cl::desc("The page size of the target in bytes"),
+ cl::init(4096));
#define DEBUG_TYPE "implicit-null-checks"
@@ -60,7 +60,7 @@ namespace {
class ImplicitNullChecks : public MachineFunctionPass {
/// Represents one null check that can be made implicit.
- struct NullCheck {
+ class NullCheck {
// The memory operation the null check can be folded into.
MachineInstr *MemOperation;
@@ -76,27 +76,42 @@ class ImplicitNullChecks : public MachineFunctionPass {
// The block branched to if the pointer is null.
MachineBasicBlock *NullSucc;
- NullCheck()
- : MemOperation(), CheckOperation(), CheckBlock(), NotNullSucc(),
- NullSucc() {}
+ // If this is non-null, then MemOperation has a dependency on on this
+ // instruction; and it needs to be hoisted to execute before MemOperation.
+ MachineInstr *OnlyDependency;
+ public:
explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
MachineBasicBlock *checkBlock,
MachineBasicBlock *notNullSucc,
- MachineBasicBlock *nullSucc)
+ MachineBasicBlock *nullSucc,
+ MachineInstr *onlyDependency)
: MemOperation(memOperation), CheckOperation(checkOperation),
- CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc) {
- }
+ CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc),
+ OnlyDependency(onlyDependency) {}
+
+ MachineInstr *getMemOperation() const { return MemOperation; }
+
+ MachineInstr *getCheckOperation() const { return CheckOperation; }
+
+ MachineBasicBlock *getCheckBlock() const { return CheckBlock; }
+
+ MachineBasicBlock *getNotNullSucc() const { return NotNullSucc; }
+
+ MachineBasicBlock *getNullSucc() const { return NullSucc; }
+
+ MachineInstr *getOnlyDependency() const { return OnlyDependency; }
};
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
MachineModuleInfo *MMI = nullptr;
bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
SmallVectorImpl<NullCheck> &NullCheckList);
MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
- MCSymbol *HandlerLabel);
+ MachineBasicBlock *HandlerMBB);
void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
public:
@@ -107,6 +122,15 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
};
/// \brief Detect re-ordering hazards and dependencies.
@@ -115,14 +139,22 @@ public:
/// machine instruction can be re-ordered from after the machine instructions
/// seen so far to before them.
class HazardDetector {
- DenseSet<unsigned> RegDefs;
+ static MachineInstr *getUnknownMI() {
+ return DenseMapInfo<MachineInstr *>::getTombstoneKey();
+ }
+
+ // Maps physical registers to the instruction defining them. If there has
+ // been more than one def of an specific register, that register is mapped to
+ // getUnknownMI().
+ DenseMap<unsigned, MachineInstr *> RegDefs;
DenseSet<unsigned> RegUses;
const TargetRegisterInfo &TRI;
bool hasSeenClobber;
+ AliasAnalysis &AA;
public:
- explicit HazardDetector(const TargetRegisterInfo &TRI) :
- TRI(TRI), hasSeenClobber(false) {}
+ explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA)
+ : TRI(TRI), hasSeenClobber(false), AA(AA) {}
/// \brief Make a note of \p MI for later queries to isSafeToHoist.
///
@@ -130,8 +162,10 @@ public:
void rememberInstruction(MachineInstr *MI);
/// \brief Return true if it is safe to hoist \p MI from after all the
- /// instructions seen so far (via rememberInstruction) to before it.
- bool isSafeToHoist(MachineInstr *MI);
+ /// instructions seen so far (via rememberInstruction) to before it. If \p MI
+ /// has one and only one transitive dependency, set \p Dependency to that
+ /// instruction. If there are more dependencies, return false.
+ bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency);
/// \brief Return true if this instance of HazardDetector has been clobbered
/// (i.e. has no more useful information).
@@ -170,15 +204,23 @@ void HazardDetector::rememberInstruction(MachineInstr *MI) {
if (!MO.isReg() || !MO.getReg())
continue;
- if (MO.isDef())
- RegDefs.insert(MO.getReg());
- else
+ if (MO.isDef()) {
+ auto It = RegDefs.find(MO.getReg());
+ if (It == RegDefs.end())
+ RegDefs.insert({MO.getReg(), MI});
+ else {
+ assert(It->second && "Found null MI?");
+ It->second = getUnknownMI();
+ }
+ } else
RegUses.insert(MO.getReg());
}
}
-bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
+bool HazardDetector::isSafeToHoist(MachineInstr *MI,
+ MachineInstr *&Dependency) {
assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
+ Dependency = nullptr;
// Right now we don't want to worry about LLVM's memory model. This can be
// made more precise later.
@@ -188,9 +230,54 @@ bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
for (auto &MO : MI->operands()) {
if (MO.isReg() && MO.getReg()) {
- for (unsigned Reg : RegDefs)
- if (TRI.regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-write or read-after-write
+ for (auto &RegDef : RegDefs) {
+ unsigned Reg = RegDef.first;
+ MachineInstr *MI = RegDef.second;
+ if (!TRI.regsOverlap(Reg, MO.getReg()))
+ continue;
+
+ // We found a write-after-write or read-after-write, see if the
+ // instruction causing this dependency can be hoisted too.
+
+ if (MI == getUnknownMI())
+ // We don't have precise dependency information.
+ return false;
+
+ if (Dependency) {
+ if (Dependency == MI)
+ continue;
+ // We already have one dependency, and we can track only one.
+ return false;
+ }
+
+ // Now check if MI is actually a dependency that can be hoisted.
+
+ // We don't want to track transitive dependencies. We already know that
+ // MI is the only instruction that defines Reg, but we need to be sure
+ // that it does not use any registers that have been defined (trivially
+ // checked below by ensuring that there are no register uses), and that
+ // it is the only def for every register it defines (otherwise we could
+ // violate a write after write hazard).
+ auto IsMIOperandSafe = [&](MachineOperand &MO) {
+ if (!MO.isReg() || !MO.getReg())
+ return true;
+ if (MO.isUse())
+ return false;
+ assert((!MO.isDef() || RegDefs.count(MO.getReg())) &&
+ "All defs must be tracked in RegDefs by now!");
+ return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI;
+ };
+
+ if (!all_of(MI->operands(), IsMIOperandSafe))
+ return false;
+
+ // Now check for speculation safety:
+ bool SawStore = true;
+ if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad())
+ return false;
+
+ Dependency = MI;
+ }
if (MO.isDef())
for (unsigned Reg : RegUses)
@@ -206,6 +293,7 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getRegInfo().getTargetRegisterInfo();
MMI = &MF.getMMI();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<NullCheck, 16> NullCheckList;
@@ -218,6 +306,16 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
return !NullCheckList.empty();
}
+// Return true if any register aliasing \p Reg is live-in into \p MBB.
+static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
+ MachineBasicBlock *MBB, unsigned Reg) {
+ for (MCRegAliasIterator AR(Reg, TRI, /*IncludeSelf*/ true); AR.isValid();
+ ++AR)
+ if (MBB->isLiveIn(*AR))
+ return true;
+ return false;
+}
+
/// Analyze MBB to check if its terminating branch can be turned into an
/// implicit null check. If yes, append a description of the said null check to
/// NullCheckList and return true, else return false.
@@ -234,7 +332,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MachineBranchPredicate MBP;
- if (TII->AnalyzeBranchPredicate(MBB, MBP, true))
+ if (TII->analyzeBranchPredicate(MBB, MBP, true))
return false;
// Is the predicate comparing an integer to zero?
@@ -319,22 +417,59 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
unsigned PointerReg = MBP.LHS.getReg();
- HazardDetector HD(*TRI);
+ HazardDetector HD(*TRI, *AA);
for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
++MII) {
- MachineInstr *MI = &*MII;
- unsigned BaseReg, Offset;
+ MachineInstr &MI = *MII;
+ unsigned BaseReg;
+ int64_t Offset;
+ MachineInstr *Dependency = nullptr;
if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
- if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
- Offset < PageSize && MI->getDesc().getNumDefs() <= 1 &&
- HD.isSafeToHoist(MI)) {
- NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
- NullSucc);
- return true;
+ if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg &&
+ Offset < PageSize && MI.getDesc().getNumDefs() <= 1 &&
+ HD.isSafeToHoist(&MI, Dependency)) {
+
+ auto DependencyOperandIsOk = [&](MachineOperand &MO) {
+ assert(!(MO.isReg() && MO.isUse()) &&
+ "No transitive dependendencies please!");
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ return true;
+
+ // Make sure that we won't clobber any live ins to the sibling block
+ // by hoisting Dependency. For instance, we can't hoist INST to
+ // before the null check (even if it safe, and does not violate any
+ // dependencies in the non_null_block) if %rdx is live in to
+ // _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx<def> = INST
+ // ...
+ if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg()))
+ return false;
+
+ // Make sure Dependency isn't re-defining the base register. Then we
+ // won't get the memory operation on the address we want.
+ if (TRI->regsOverlap(MO.getReg(), BaseReg))
+ return false;
+
+ return true;
+ };
+
+ bool DependencyOperandsAreOk =
+ !Dependency ||
+ all_of(Dependency->operands(), DependencyOperandIsOk);
+
+ if (DependencyOperandsAreOk) {
+ NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+ NullSucc, Dependency);
+ return true;
+ }
}
- HD.rememberInstruction(MI);
+ HD.rememberInstruction(&MI);
if (HD.isClobbered())
return false;
}
@@ -344,11 +479,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI
-/// (defining the same register), and branches to HandlerLabel if the load
+/// (defining the same register), and branches to HandlerMBB if the load
/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
-MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
- MachineBasicBlock *MBB,
- MCSymbol *HandlerLabel) {
+MachineInstr *
+ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB) {
const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
// all targets.
@@ -364,7 +500,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
}
auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
- .addSym(HandlerLabel)
+ .addMBB(HandlerMBB)
.addImm(LoadMI->getOpcode());
for (auto &MO : LoadMI->uses())
@@ -381,28 +517,51 @@ void ImplicitNullChecks::rewriteNullChecks(
DebugLoc DL;
for (auto &NC : NullCheckList) {
- MCSymbol *HandlerLabel = MMI->getContext().createTempSymbol();
-
// Remove the conditional branch dependent on the null check.
- unsigned BranchesRemoved = TII->RemoveBranch(*NC.CheckBlock);
+ unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock());
(void)BranchesRemoved;
assert(BranchesRemoved > 0 && "expected at least one branch!");
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ DepMI->removeFromParent();
+ NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
+ }
+
// Insert a faulting load where the conditional branch was originally. We
// check earlier ensures that this bit of code motion is legal. We do not
// touch the successors list for any basic block since we haven't changed
// control flow, we've just made it implicit.
- insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel);
- NC.MemOperation->eraseFromParent();
- NC.CheckOperation->eraseFromParent();
+ MachineInstr *FaultingLoad = insertFaultingLoad(
+ NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
+ // Now the values defined by MemOperation, if any, are live-in of
+ // the block of MemOperation.
+ // The original load operation may define implicit-defs alongside
+ // the loaded value.
+ MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
+ for (const MachineOperand &MO : FaultingLoad->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || MBB->isLiveIn(Reg))
+ continue;
+ MBB->addLiveIn(Reg);
+ }
- // Insert an *unconditional* branch to not-null successor.
- TII->InsertBranch(*NC.CheckBlock, NC.NotNullSucc, nullptr, /*Cond=*/None,
- DL);
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ for (auto &MO : DepMI->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
+ NC.getNotNullSucc()->addLiveIn(MO.getReg());
+ }
+ }
+
+ NC.getMemOperation()->eraseFromParent();
+ NC.getCheckOperation()->eraseFromParent();
- // Emit the HandlerLabel as an EH_LABEL.
- BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL,
- TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel);
+ // Insert an *unconditional* branch to not-null successor.
+ TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+ /*Cond=*/None, DL);
NumImplicitNullChecks++;
}
@@ -412,5 +571,6 @@ char ImplicitNullChecks::ID = 0;
char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
"Implicit null checks", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
"Implicit null checks", false, false)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index e310132..197db77 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "Spiller.h"
+#include "SplitKit.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -30,6 +32,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,13 +51,82 @@ STATISTIC(NumReloadsRemoved, "Number of reloads removed");
STATISTIC(NumFolded, "Number of folded stack accesses");
STATISTIC(NumFoldedLoads, "Number of folded loads");
STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
-STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads");
-STATISTIC(NumHoists, "Number of hoisted spills");
static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
namespace {
+class HoistSpillHelper : private LiveRangeEdit::Delegate {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ InsertPointAnalysis IPA;
+
+ // Map from StackSlot to its original register.
+ DenseMap<int, unsigned> StackSlotToReg;
+ // Map from pair of (StackSlot and Original VNI) to a set of spills which
+ // have the same stackslot and have equal values defined by Original VNI.
+ // These spills are mergeable and are hoist candiates.
+ typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
+ MergeableSpillsMap;
+ MergeableSpillsMap MergeableSpills;
+
+ /// This is the map from original register to a set containing all its
+ /// siblings. To hoist a spill to another BB, we need to find out a live
+ /// sibling there and use it as the source of the new spill.
+ DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
+
+ bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
+ unsigned &LiveReg);
+
+ void rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
+
+public:
+ HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ IPA(LIS, mf.getNumBlockIDs()) {}
+
+ void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original);
+ bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot);
+ void hoistAllSpills();
+ void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+};
+
class InlineSpiller : public Spiller {
MachineFunction &MF;
LiveIntervals &LIS;
@@ -85,56 +157,12 @@ class InlineSpiller : public Spiller {
// Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> UsedValues;
-public:
- // Information about a value that was defined by a copy from a sibling
- // register.
- struct SibValueInfo {
- // True when all reaching defs were reloads: No spill is necessary.
- bool AllDefsAreReloads;
-
- // True when value is defined by an original PHI not from splitting.
- bool DefByOrigPHI;
-
- // True when the COPY defining this value killed its source.
- bool KillsSource;
-
- // The preferred register to spill.
- unsigned SpillReg;
-
- // The value of SpillReg that should be spilled.
- VNInfo *SpillVNI;
-
- // The block where SpillVNI should be spilled. Currently, this must be the
- // block containing SpillVNI->def.
- MachineBasicBlock *SpillMBB;
-
- // A defining instruction that is not a sibling copy or a reload, or NULL.
- // This can be used as a template for rematerialization.
- MachineInstr *DefMI;
-
- // List of values that depend on this one. These values are actually the
- // same, but live range splitting has placed them in different registers,
- // or SSA update needed to insert PHI-defs to preserve SSA form. This is
- // copies of the current value and phi-kills. Usually only phi-kills cause
- // more than one dependent value.
- TinyPtrVector<VNInfo*> Deps;
-
- SibValueInfo(unsigned Reg, VNInfo *VNI)
- : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
- SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
-
- // Returns true when a def has been found.
- bool hasDef() const { return DefByOrigPHI || DefMI; }
- };
-
-private:
- // Values in RegsToSpill defined by sibling copies.
- typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap;
- SibValueMap SibValues;
-
// Dead defs generated during spilling.
SmallVector<MachineInstr*, 8> DeadDefs;
+ // Object records spills information and does the hoisting.
+ HoistSpillHelper HSpiller;
+
~InlineSpiller() override {}
public:
@@ -147,9 +175,11 @@ public:
MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
- MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ HSpiller(pass, mf, vrm) {}
void spill(LiveRangeEdit &) override;
+ void postOptimization() override;
private:
bool isSnippet(const LiveInterval &SnipLI);
@@ -161,15 +191,11 @@ private:
}
bool isSibling(unsigned Reg);
- MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
- void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr);
- void analyzeSiblingValues();
-
- bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
+ bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
- bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI);
+ bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
@@ -210,13 +236,13 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
/// otherwise return 0.
-static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
- if (!MI->isFullCopy())
+static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) {
+ if (!MI.isFullCopy())
return 0;
- if (MI->getOperand(0).getReg() == Reg)
- return MI->getOperand(1).getReg();
- if (MI->getOperand(1).getReg() == Reg)
- return MI->getOperand(0).getReg();
+ if (MI.getOperand(0).getReg() == Reg)
+ return MI.getOperand(1).getReg();
+ if (MI.getOperand(1).getReg() == Reg)
+ return MI.getOperand(0).getReg();
return 0;
}
@@ -242,7 +268,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
for (MachineRegisterInfo::reg_instr_nodbg_iterator
RI = MRI.reg_instr_nodbg_begin(SnipLI.reg),
E = MRI.reg_instr_nodbg_end(); RI != E; ) {
- MachineInstr *MI = &*(RI++);
+ MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
if (isFullCopyOf(MI, Reg))
@@ -258,9 +284,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
continue;
// Allow a single additional instruction.
- if (UseMI && MI != UseMI)
+ if (UseMI && &MI != UseMI)
return false;
- UseMI = MI;
+ UseMI = &MI;
}
return true;
}
@@ -281,14 +307,14 @@ void InlineSpiller::collectRegsToSpill() {
for (MachineRegisterInfo::reg_instr_iterator
RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
- MachineInstr *MI = &*(RI++);
+ MachineInstr &MI = *RI++;
unsigned SnipReg = isFullCopyOf(MI, Reg);
if (!isSibling(SnipReg))
continue;
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
if (!isSnippet(SnipLI))
continue;
- SnippetCopies.insert(MI);
+ SnippetCopies.insert(&MI);
if (isRegToSpill(SnipReg))
continue;
RegsToSpill.push_back(SnipReg);
@@ -297,418 +323,46 @@ void InlineSpiller::collectRegsToSpill() {
}
}
-
-//===----------------------------------------------------------------------===//
-// Sibling Values
-//===----------------------------------------------------------------------===//
-
-// After live range splitting, some values to be spilled may be defined by
-// copies from sibling registers. We trace the sibling copies back to the
-// original value if it still exists. We need it for rematerialization.
-//
-// Even when the value can't be rematerialized, we still want to determine if
-// the value has already been spilled, or we may want to hoist the spill from a
-// loop.
-
bool InlineSpiller::isSibling(unsigned Reg) {
return TargetRegisterInfo::isVirtualRegister(Reg) &&
VRM.getOriginal(Reg) == Original;
}
-#ifndef NDEBUG
-static raw_ostream &operator<<(raw_ostream &OS,
- const InlineSpiller::SibValueInfo &SVI) {
- OS << "spill " << PrintReg(SVI.SpillReg) << ':'
- << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def;
- if (SVI.SpillMBB)
- OS << " in BB#" << SVI.SpillMBB->getNumber();
- if (SVI.AllDefsAreReloads)
- OS << " all-reloads";
- if (SVI.DefByOrigPHI)
- OS << " orig-phi";
- if (SVI.KillsSource)
- OS << " kill";
- OS << " deps[";
- for (VNInfo *Dep : SVI.Deps)
- OS << ' ' << Dep->id << '@' << Dep->def;
- OS << " ]";
- if (SVI.DefMI)
- OS << " def: " << *SVI.DefMI;
- else
- OS << '\n';
- return OS;
-}
-#endif
-
-/// propagateSiblingValue - Propagate the value in SVI to dependents if it is
-/// known. Otherwise remember the dependency for later.
+/// It is beneficial to spill to earlier place in the same BB in case
+/// as follows:
+/// There is an alternative def earlier in the same MBB.
+/// Hoist the spill as far as possible in SpillMBB. This can ease
+/// register pressure:
///
-/// @param SVIIter SibValues entry to propagate.
-/// @param VNI Dependent value, or NULL to propagate to all saved dependents.
-void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
- VNInfo *VNI) {
- SibValueMap::value_type *SVI = &*SVIIter;
-
- // When VNI is non-NULL, add it to SVI's deps, and only propagate to that.
- TinyPtrVector<VNInfo*> FirstDeps;
- if (VNI) {
- FirstDeps.push_back(VNI);
- SVI->second.Deps.push_back(VNI);
- }
-
- // Has the value been completely determined yet? If not, defer propagation.
- if (!SVI->second.hasDef())
- return;
-
- // Work list of values to propagate.
- SmallSetVector<SibValueMap::value_type *, 8> WorkList;
- WorkList.insert(SVI);
-
- do {
- SVI = WorkList.pop_back_val();
- TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
- VNI = nullptr;
-
- SibValueInfo &SV = SVI->second;
- if (!SV.SpillMBB)
- SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def);
-
- DEBUG(dbgs() << " prop to " << Deps->size() << ": "
- << SVI->first->id << '@' << SVI->first->def << ":\t" << SV);
-
- assert(SV.hasDef() && "Propagating undefined value");
-
- // Should this value be propagated as a preferred spill candidate? We don't
- // propagate values of registers that are about to spill.
- bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
- unsigned SpillDepth = ~0u;
-
- for (VNInfo *Dep : *Deps) {
- SibValueMap::iterator DepSVI = SibValues.find(Dep);
- assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
- SibValueInfo &DepSV = DepSVI->second;
- if (!DepSV.SpillMBB)
- DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def);
-
- bool Changed = false;
-
- // Propagate defining instruction.
- if (!DepSV.hasDef()) {
- Changed = true;
- DepSV.DefMI = SV.DefMI;
- DepSV.DefByOrigPHI = SV.DefByOrigPHI;
- }
-
- // Propagate AllDefsAreReloads. For PHI values, this computes an AND of
- // all predecessors.
- if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) {
- Changed = true;
- DepSV.AllDefsAreReloads = false;
- }
-
- // Propagate best spill value.
- if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) {
- if (SV.SpillMBB == DepSV.SpillMBB) {
- // DepSV is in the same block. Hoist when dominated.
- if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) {
- // This is an alternative def earlier in the same MBB.
- // Hoist the spill as far as possible in SpillMBB. This can ease
- // register pressure:
- //
- // x = def
- // y = use x
- // s = copy x
- //
- // Hoisting the spill of s to immediately after the def removes the
- // interference between x and y:
- //
- // x = def
- // spill x
- // y = use x<kill>
- //
- // This hoist only helps when the DepSV copy kills its source.
- Changed = true;
- DepSV.SpillReg = SV.SpillReg;
- DepSV.SpillVNI = SV.SpillVNI;
- DepSV.SpillMBB = SV.SpillMBB;
- }
- } else {
- // DepSV is in a different block.
- if (SpillDepth == ~0u)
- SpillDepth = Loops.getLoopDepth(SV.SpillMBB);
-
- // Also hoist spills to blocks with smaller loop depth, but make sure
- // that the new value dominates. Non-phi dependents are always
- // dominated, phis need checking.
-
- const BranchProbability MarginProb(4, 5); // 80%
- // Hoist a spill to outer loop if there are multiple dependents (it
- // can be beneficial if more than one dependents are hoisted) or
- // if DepSV (the hoisting source) is hotter than SV (the hoisting
- // destination) (we add a 80% margin to bias a little towards
- // loop depth).
- bool HoistCondition =
- (MBFI.getBlockFreq(DepSV.SpillMBB) >=
- (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
- Deps->size() > 1;
-
- if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
- HoistCondition &&
- (!DepSVI->first->isPHIDef() ||
- MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
- Changed = true;
- DepSV.SpillReg = SV.SpillReg;
- DepSV.SpillVNI = SV.SpillVNI;
- DepSV.SpillMBB = SV.SpillMBB;
- }
- }
- }
-
- if (!Changed)
- continue;
-
- // Something changed in DepSVI. Propagate to dependents.
- WorkList.insert(&*DepSVI);
-
- DEBUG(dbgs() << " update " << DepSVI->first->id << '@'
- << DepSVI->first->def << " to:\t" << DepSV);
- }
- } while (!WorkList.empty());
-}
-
-/// traceSiblingValue - Trace a value that is about to be spilled back to the
-/// real defining instructions by looking through sibling copies. Always stay
-/// within the range of OrigVNI so the registers are known to carry the same
-/// value.
+/// x = def
+/// y = use x
+/// s = copy x
///
-/// Determine if the value is defined by all reloads, so spilling isn't
-/// necessary - the value is already in the stack slot.
+/// Hoisting the spill of s to immediately after the def removes the
+/// interference between x and y:
///
-/// Return a defining instruction that may be a candidate for rematerialization.
+/// x = def
+/// spill x
+/// y = use x<kill>
///
-MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
- VNInfo *OrigVNI) {
- // Check if a cached value already exists.
- SibValueMap::iterator SVI;
- bool Inserted;
- std::tie(SVI, Inserted) =
- SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
- if (!Inserted) {
- DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
- << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second);
- return SVI->second.DefMI;
- }
-
- DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':'
- << UseVNI->id << '@' << UseVNI->def << '\n');
-
- // List of (Reg, VNI) that have been inserted into SibValues, but need to be
- // processed.
- SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList;
- WorkList.push_back(std::make_pair(UseReg, UseVNI));
-
- LiveInterval &OrigLI = LIS.getInterval(Original);
- do {
- unsigned Reg;
- VNInfo *VNI;
- std::tie(Reg, VNI) = WorkList.pop_back_val();
- DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
- << ":\t");
-
- // First check if this value has already been computed.
- SVI = SibValues.find(VNI);
- assert(SVI != SibValues.end() && "Missing SibValues entry");
-
- // Trace through PHI-defs created by live range splitting.
- if (VNI->isPHIDef()) {
- // Stop at original PHIs. We don't know the value at the
- // predecessors. Look up the VNInfo for the current definition
- // in OrigLI, to properly determine whether or not this phi was
- // added by splitting.
- if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) {
- DEBUG(dbgs() << "orig phi value\n");
- SVI->second.DefByOrigPHI = true;
- SVI->second.AllDefsAreReloads = false;
- propagateSiblingValue(SVI);
- continue;
- }
-
- // This is a PHI inserted by live range splitting. We could trace the
- // live-out value from predecessor blocks, but that search can be very
- // expensive if there are many predecessors and many more PHIs as
- // generated by tail-dup when it sees an indirectbr. Instead, look at
- // all the non-PHI defs that have the same value as OrigVNI. They must
- // jointly dominate VNI->def. This is not optimal since VNI may actually
- // be jointly dominated by a smaller subset of defs, so there is a change
- // we will miss a AllDefsAreReloads optimization.
-
- // Separate all values dominated by OrigVNI into PHIs and non-PHIs.
- SmallVector<VNInfo*, 8> PHIs, NonPHIs;
- LiveInterval &LI = LIS.getInterval(Reg);
-
- for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end();
- VI != VE; ++VI) {
- VNInfo *VNI2 = *VI;
- if (VNI2->isUnused())
- continue;
- if (!OrigLI.containsOneValue() &&
- OrigLI.getVNInfoAt(VNI2->def) != OrigVNI)
- continue;
- if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def)
- PHIs.push_back(VNI2);
- else
- NonPHIs.push_back(VNI2);
- }
- DEBUG(dbgs() << "split phi value, checking " << PHIs.size()
- << " phi-defs, and " << NonPHIs.size()
- << " non-phi/orig defs\n");
-
- // Create entries for all the PHIs. Don't add them to the worklist, we
- // are processing all of them in one go here.
- for (VNInfo *PHI : PHIs)
- SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
-
- // Add every PHI as a dependent of all the non-PHIs.
- for (VNInfo *NonPHI : NonPHIs) {
- // Known value? Try an insertion.
- std::tie(SVI, Inserted) =
- SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
- // Add all the PHIs as dependents of NonPHI.
- SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(),
- PHIs.end());
- // This is the first time we see NonPHI, add it to the worklist.
- if (Inserted)
- WorkList.push_back(std::make_pair(Reg, NonPHI));
- else
- // Propagate to all inserted PHIs, not just VNI.
- propagateSiblingValue(SVI);
- }
-
- // Next work list item.
- continue;
- }
-
- MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
- assert(MI && "Missing def");
-
- // Trace through sibling copies.
- if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
- if (isSibling(SrcReg)) {
- LiveInterval &SrcLI = LIS.getInterval(SrcReg);
- LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
- assert(SrcQ.valueIn() && "Copy from non-existing value");
- // Check if this COPY kills its source.
- SVI->second.KillsSource = SrcQ.isKill();
- VNInfo *SrcVNI = SrcQ.valueIn();
- DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':'
- << SrcVNI->id << '@' << SrcVNI->def
- << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
- // Known sibling source value? Try an insertion.
- std::tie(SVI, Inserted) = SibValues.insert(
- std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
- // This is the first time we see Src, add it to the worklist.
- if (Inserted)
- WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
- propagateSiblingValue(SVI, VNI);
- // Next work list item.
- continue;
- }
- }
-
- // Track reachable reloads.
- SVI->second.DefMI = MI;
- SVI->second.SpillMBB = MI->getParent();
- int FI;
- if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) {
- DEBUG(dbgs() << "reload\n");
- propagateSiblingValue(SVI);
- // Next work list item.
- continue;
- }
-
- // Potential remat candidate.
- DEBUG(dbgs() << "def " << *MI);
- SVI->second.AllDefsAreReloads = false;
- propagateSiblingValue(SVI);
- } while (!WorkList.empty());
-
- // Look up the value we were looking for. We already did this lookup at the
- // top of the function, but SibValues may have been invalidated.
- SVI = SibValues.find(UseVNI);
- assert(SVI != SibValues.end() && "Didn't compute requested info");
- DEBUG(dbgs() << " traced to:\t" << SVI->second);
- return SVI->second.DefMI;
-}
-
-/// analyzeSiblingValues - Trace values defined by sibling copies back to
-/// something that isn't a sibling copy.
+/// This hoist only helps when the copy kills its source.
///
-/// Keep track of values that may be rematerializable.
-void InlineSpiller::analyzeSiblingValues() {
- SibValues.clear();
-
- // No siblings at all?
- if (Edit->getReg() == Original)
- return;
-
- LiveInterval &OrigLI = LIS.getInterval(Original);
- for (unsigned Reg : RegsToSpill) {
- LiveInterval &LI = LIS.getInterval(Reg);
- for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
- VE = LI.vni_end(); VI != VE; ++VI) {
- VNInfo *VNI = *VI;
- if (VNI->isUnused())
- continue;
- MachineInstr *DefMI = nullptr;
- if (!VNI->isPHIDef()) {
- DefMI = LIS.getInstructionFromIndex(VNI->def);
- assert(DefMI && "No defining instruction");
- }
- // Check possible sibling copies.
- if (VNI->isPHIDef() || DefMI->isCopy()) {
- VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
- assert(OrigVNI && "Def outside original live range");
- if (OrigVNI->def != VNI->def)
- DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
- }
- if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
- DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
- << VNI->def << " may remat from " << *DefMI);
- }
- }
- }
-}
-
-/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert
-/// a spill at a better location.
-bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
+bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
+ MachineInstr &CopyMI) {
SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+#ifndef NDEBUG
VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
- SibValueMap::iterator I = SibValues.find(VNI);
- if (I == SibValues.end())
- return false;
-
- const SibValueInfo &SVI = I->second;
+#endif
- // Let the normal folding code deal with the boring case.
- if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI)
+ unsigned SrcReg = CopyMI.getOperand(1).getReg();
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
+ LiveQueryResult SrcQ = SrcLI.Query(Idx);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
+ if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
return false;
- // SpillReg may have been deleted by remat and DCE.
- if (!LIS.hasInterval(SVI.SpillReg)) {
- DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n');
- SibValues.erase(I);
- return false;
- }
-
- LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg);
- if (!SibLI.containsValue(SVI.SpillVNI)) {
- DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n');
- SibValues.erase(I);
- return false;
- }
-
// Conservatively extend the stack slot range to the range of the original
// value. We may be able to do better with stack slot coloring by being more
// careful here.
@@ -719,35 +373,29 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
<< *StackInt << '\n');
- // Already spilled everywhere.
- if (SVI.AllDefsAreReloads) {
- DEBUG(dbgs() << "\tno spill needed: " << SVI);
- ++NumOmitReloadSpill;
- return true;
- }
- // We are going to spill SVI.SpillVNI immediately after its def, so clear out
+ // We are going to spill SrcVNI immediately after its def, so clear out
// any later spills of the same value.
- eliminateRedundantSpills(SibLI, SVI.SpillVNI);
+ eliminateRedundantSpills(SrcLI, SrcVNI);
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
MachineBasicBlock::iterator MII;
- if (SVI.SpillVNI->isPHIDef())
+ if (SrcVNI->isPHIDef())
MII = MBB->SkipPHIsAndLabels(MBB->begin());
else {
- MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
assert(DefMI && "Defining instruction disappeared");
MII = DefMI;
++MII;
}
// Insert spill without kill flag immediately after def.
- TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot,
- MRI.getRegClass(SVI.SpillReg), &TRI);
+ TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
+ MRI.getRegClass(SrcReg), &TRI);
--MII; // Point to store instruction.
- LIS.InsertMachineInstrInMaps(MII);
- DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
+ LIS.InsertMachineInstrInMaps(*MII);
+ DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+ HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
++NumSpills;
- ++NumHoists;
return true;
}
@@ -778,8 +426,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
for (MachineRegisterInfo::use_instr_nodbg_iterator
UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
UI != E; ) {
- MachineInstr *MI = &*(UI++);
- if (!MI->isCopy() && !MI->mayStore())
+ MachineInstr &MI = *UI++;
+ if (!MI.isCopy() && !MI.mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
@@ -800,12 +448,13 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Erase spills.
int FI;
if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
- DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI);
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);
// eliminateDeadDefs won't normally remove stores, so switch opcode.
- MI->setDesc(TII.get(TargetOpcode::KILL));
- DeadDefs.push_back(MI);
+ MI.setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(&MI);
++NumSpillsRemoved;
- --NumSpills;
+ if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
+ --NumSpills;
}
}
} while (!WorkList.empty());
@@ -849,13 +498,12 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
}
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
-bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
- MachineBasicBlock::iterator MI) {
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
if (!RI.Reads)
return false;
@@ -865,26 +513,26 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
if (!ParentVNI) {
DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
MO.setIsUndef();
}
- DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
}
- if (SnippetCopies.count(MI))
+ if (SnippetCopies.count(&MI))
return false;
- // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy.
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
- SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
- if (SibI != SibValues.end())
- RM.OrigMI = SibI->second.DefMI;
- if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
}
@@ -892,7 +540,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// same register for uses and defs.
if (RI.Tied) {
markValueUsed(&VirtReg, ParentVNI);
- DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);
return false;
}
@@ -909,8 +557,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
unsigned NewVReg = Edit->createFrom(Original);
// Finally we can rematerialize OrigMI before MI.
- SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM,
- TRI);
+ SlotIndex DefIdx =
+ Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
(void)DefIdx;
DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
<< *LIS.getInstructionFromIndex(DefIdx));
@@ -923,7 +571,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
MO.setIsKill();
}
}
- DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n');
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n');
++NumRemats;
return true;
@@ -932,7 +580,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
- // analyzeSiblingValues has already tested all relevant defining instructions.
if (!Edit->anyRematerializable(AA))
return;
@@ -945,10 +592,10 @@ void InlineSpiller::reMaterializeAll() {
for (MachineRegisterInfo::reg_bundle_iterator
RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
RegI != E; ) {
- MachineInstr *MI = &*(RegI++);
+ MachineInstr &MI = *RegI++;
// Debug values are not allowed to affect codegen.
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
continue;
anyRemat |= reMaterializeFor(LI, MI);
@@ -979,20 +626,22 @@ void InlineSpiller::reMaterializeAll() {
if (DeadDefs.empty())
return;
DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
- Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
-
- // Get rid of deleted and empty intervals.
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
+
+ // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions
+ // after rematerialization. To remove a VNI for a vreg from its LiveInterval,
+ // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all
+ // removed, PHI VNI are still left in the LiveInterval.
+ // So to get rid of unused reg, we need to check whether it has non-dbg
+ // reference instead of whether it has non-empty interval.
unsigned ResultPos = 0;
for (unsigned Reg : RegsToSpill) {
- if (!LIS.hasInterval(Reg))
- continue;
-
- LiveInterval &LI = LIS.getInterval(Reg);
- if (LI.empty()) {
+ if (MRI.reg_nodbg_empty(Reg)) {
Edit->eraseVirtReg(Reg);
continue;
}
-
+ assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) &&
+ "Reg with empty interval has reference");
RegsToSpill[ResultPos++] = Reg;
}
RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
@@ -1007,17 +656,20 @@ void InlineSpiller::reMaterializeAll() {
/// If MI is a load or store of StackSlot, it can be removed.
bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
int FI = 0;
- unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI);
+ unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI);
bool IsLoad = InstrReg;
if (!IsLoad)
- InstrReg = TII.isStoreToStackSlot(MI, FI);
+ InstrReg = TII.isStoreToStackSlot(*MI, FI);
// We have a stack access. Is it the right register and slot?
if (InstrReg != Reg || FI != StackSlot)
return false;
+ if (!IsLoad)
+ HSpiller.rmFromMergeableSpills(*MI, StackSlot);
+
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
- LIS.RemoveMachineInstrFromMaps(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
if (IsLoad) {
@@ -1049,7 +701,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
dbgs() << '\t' << header << ": " << NextLine;
for (MachineBasicBlock::iterator I = B; I != E; ++I) {
- SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot();
// If a register was passed in and this instruction has it as a
// destination that is marked as an early clobber, print the
@@ -1113,13 +765,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
MachineInstrSpan MIS(MI);
MachineInstr *FoldMI =
- LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI)
- : TII.foldMemoryOperand(MI, FoldOps, StackSlot);
+ LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
if (!FoldMI)
return false;
// Remove LIS for any dead defs in the original MI not in FoldMI.
- for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
+ for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
if (!MO->isReg())
continue;
unsigned Reg = MO->getReg();
@@ -1131,23 +783,27 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
if (MO->isUse())
continue;
MIBundleOperands::PhysRegInfo RI =
- MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
+ MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI);
if (RI.FullyDefined)
continue;
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
- SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
LIS.removePhysRegDefAt(Reg, Idx);
}
- LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
+ int FI;
+ if (TII.isStoreToStackSlot(*MI, FI) &&
+ HSpiller.rmFromMergeableSpills(*MI, FI))
+ --NumSpills;
+ LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
assert(!MIS.empty() && "Unexpected empty span of instructions!");
for (MachineInstr &MI : MIS)
if (&MI != FoldMI)
- LIS.InsertMachineInstrInMaps(&MI);
+ LIS.InsertMachineInstrInMaps(MI);
// TII.foldMemoryOperand may have left some implicit operands on the
// instruction. Strip them.
@@ -1165,9 +821,10 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
if (!WasCopy)
++NumFolded;
- else if (Ops.front().second == 0)
+ else if (Ops.front().second == 0) {
++NumSpills;
- else
+ HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
+ } else
++NumReloads;
return true;
}
@@ -1202,6 +859,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
"spill"));
++NumSpills;
+ HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
}
/// spillAroundUses - insert spill code around each use of Reg.
@@ -1246,17 +904,17 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
+ MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
// Check for a sibling copy.
- unsigned SibReg = isFullCopyOf(MI, Reg);
+ unsigned SibReg = isFullCopyOf(*MI, Reg);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
@@ -1265,8 +923,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
continue;
}
if (RI.Writes) {
- // Hoist the spill of a sib-reg copy.
- if (hoistSpill(OldLI, MI)) {
+ if (hoistSpillInsideBB(OldLI, *MI)) {
// This COPY is now dead, the value is already in the stack slot.
MI->getOperand(0).setIsDead();
DeadDefs.push_back(MI);
@@ -1339,7 +996,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
- Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
}
// Finally delete the SnippetCopies.
@@ -1347,11 +1004,11 @@ void InlineSpiller::spillAll() {
for (MachineRegisterInfo::reg_instr_iterator
RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
RI != E; ) {
- MachineInstr *MI = &*(RI++);
- assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
+ MachineInstr &MI = *(RI++);
+ assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
+ MI.eraseFromParent();
}
}
@@ -1379,7 +1036,6 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
collectRegsToSpill();
- analyzeSiblingValues();
reMaterializeAll();
// Remat may handle everything.
@@ -1388,3 +1044,413 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
Edit->calculateRegClassAndHint(MF, Loops, MBFI);
}
+
+/// Optimizations after all the reg selections and spills are done.
+///
+void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); }
+
+/// When a spill is inserted, add the spill to MergeableSpills map.
+///
+void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original) {
+ StackSlotToReg[StackSlot] = Original;
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ MergeableSpills[MIdx].insert(&Spill);
+}
+
+/// When a spill is removed, remove the spill from MergeableSpills map.
+/// Return true if the spill is removed successfully.
+///
+bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
+ int StackSlot) {
+ int Original = StackSlotToReg[StackSlot];
+ if (!Original)
+ return false;
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ return MergeableSpills[MIdx].erase(&Spill);
+}
+
+/// Check BB to see if it is a possible target BB to place a hoisted spill,
+/// i.e., there should be a living sibling of OrigReg at the insert point.
+///
+bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, unsigned &LiveReg) {
+ SlotIndex Idx;
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
+ if (MI != BB.end())
+ Idx = LIS.getInstructionIndex(*MI);
+ else
+ Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
+ SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
+ assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
+ "Unexpected VNI");
+
+ for (auto const SibReg : Siblings) {
+ LiveInterval &LI = LIS.getInterval(SibReg);
+ VNInfo *VNI = LI.getVNInfoAt(Idx);
+ if (VNI) {
+ LiveReg = SibReg;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Remove redundant spills in the same BB. Save those redundant spills in
+/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
+///
+void HoistSpillHelper::rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // For each spill saw, check SpillBBToSpill[] and see if its BB already has
+ // another spill inside. If a BB contains more than one spill, only keep the
+ // earlier spill with smaller SlotIndex.
+ for (const auto CurrentSpill : Spills) {
+ MachineBasicBlock *Block = CurrentSpill->getParent();
+ MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+ MachineInstr *PrevSpill = SpillBBToSpill[Node];
+ if (PrevSpill) {
+ SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
+ SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
+ MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
+ MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
+ SpillsToRm.push_back(SpillToRm);
+ SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+ } else {
+ SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+ }
+ }
+ for (const auto SpillToRm : SpillsToRm)
+ Spills.erase(SpillToRm);
+}
+
+/// Starting from \p Root find a top-down traversal order of the dominator
+/// tree to visit all basic blocks containing the elements of \p Spills.
+/// Redundant spills will be found and put into \p SpillsToRm at the same
+/// time. \p SpillBBToSpill will be populated as part of the process and
+/// maps a basic block to the first store occurring in the basic block.
+/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
+///
+void HoistSpillHelper::getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // The set contains all the possible BB nodes to which we may hoist
+ // original spills.
+ SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
+ // Save the BB nodes on the path from the first BB node containing
+ // non-redundant spill to the Root node.
+ SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
+ // All the spills to be hoisted must originate from a single def instruction
+ // to the OrigReg. It means the def instruction should dominate all the spills
+ // to be hoisted. We choose the BB where the def instruction is located as
+ // the Root.
+ MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
+ // For every node on the dominator tree with spill, walk up on the dominator
+ // tree towards the Root node until it is reached. If there is other node
+ // containing spill in the middle of the path, the previous spill saw will
+ // be redundant and the node containing it will be removed. All the nodes on
+ // the path starting from the first node with non-redundant spill to the Root
+ // node will be added to the WorkSet, which will contain all the possible
+ // locations where spills may be hoisted to after the loop below is done.
+ for (const auto Spill : Spills) {
+ MachineBasicBlock *Block = Spill->getParent();
+ MachineDomTreeNode *Node = MDT[Block];
+ MachineInstr *SpillToRm = nullptr;
+ while (Node != RootIDomNode) {
+ // If Node dominates Block, and it already contains a spill, the spill in
+ // Block will be redundant.
+ if (Node != MDT[Block] && SpillBBToSpill[Node]) {
+ SpillToRm = SpillBBToSpill[MDT[Block]];
+ break;
+ /// If we see the Node already in WorkSet, the path from the Node to
+ /// the Root node must already be traversed by another spill.
+ /// Then no need to repeat.
+ } else if (WorkSet.count(Node)) {
+ break;
+ } else {
+ NodesOnPath.insert(Node);
+ }
+ Node = Node->getIDom();
+ }
+ if (SpillToRm) {
+ SpillsToRm.push_back(SpillToRm);
+ } else {
+ // Add a BB containing the original spills to SpillsToKeep -- i.e.,
+ // set the initial status before hoisting start. The value of BBs
+ // containing original spills is set to 0, in order to descriminate
+ // with BBs containing hoisted spills which will be inserted to
+ // SpillsToKeep later during hoisting.
+ SpillsToKeep[MDT[Block]] = 0;
+ WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
+ }
+ NodesOnPath.clear();
+ }
+
+ // Sort the nodes in WorkSet in top-down order and save the nodes
+ // in Orders. Orders will be used for hoisting in runHoistSpills.
+ unsigned idx = 0;
+ Orders.push_back(MDT.DT->getNode(Root));
+ do {
+ MachineDomTreeNode *Node = Orders[idx++];
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (WorkSet.count(Child))
+ Orders.push_back(Child);
+ }
+ } while (idx != Orders.size());
+ assert(Orders.size() == WorkSet.size() &&
+ "Orders have different size with WorkSet");
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++)
+ DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
+ DEBUG(dbgs() << "\n");
+#endif
+}
+
+/// Try to hoist spills according to BB hotness. The spills to removed will
+/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
+/// \p SpillsToIns.
+///
+void HoistSpillHelper::runHoistSpills(
+ unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
+ // Visit order of dominator tree nodes.
+ SmallVector<MachineDomTreeNode *, 32> Orders;
+ // SpillsToKeep contains all the nodes where spills are to be inserted
+ // during hoisting. If the spill to be inserted is an original spill
+ // (not a hoisted one), the value of the map entry is 0. If the spill
+ // is a hoisted spill, the value of the map entry is the VReg to be used
+ // as the source of the spill.
+ DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
+ // Map from BB to the first spill inside of it.
+ DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
+
+ rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
+
+ MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
+ getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
+ SpillBBToSpill);
+
+ // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
+ // nodes set and the cost of all the spills inside those nodes.
+ // The nodes set are the locations where spills are to be inserted
+ // in the subtree of current node.
+ typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
+ NodesCostPair;
+ DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+ // Iterate Orders set in reverse order, which will be a bottom-up order
+ // in the dominator tree. Once we visit a dom tree node, we know its
+ // children have already been visited and the spill locations in the
+ // subtrees of all the children have been determined.
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++) {
+ MachineBasicBlock *Block = (*RIt)->getBlock();
+
+ // If Block contains an original spill, simply continue.
+ if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
+ SpillsInSubTreeMap[*RIt].first.insert(*RIt);
+ // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
+ SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
+ continue;
+ }
+
+ // Collect spills in subtree of current node (*RIt) to
+ // SpillsInSubTreeMap[*RIt].first.
+ const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
+ continue;
+ // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
+ // should be placed before getting the begin and end iterators of
+ // SpillsInSubTreeMap[Child].first, or else the iterators may be
+ // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
+ // and the map grows and then the original buckets in the map are moved.
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ SubTreeCost += SpillsInSubTreeMap[Child].second;
+ auto BI = SpillsInSubTreeMap[Child].first.begin();
+ auto EI = SpillsInSubTreeMap[Child].first.end();
+ SpillsInSubTree.insert(BI, EI);
+ SpillsInSubTreeMap.erase(Child);
+ }
+
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ // No spills in subtree, simply continue.
+ if (SpillsInSubTree.empty())
+ continue;
+
+ // Check whether Block is a possible candidate to insert spill.
+ unsigned LiveReg = 0;
+ if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
+ continue;
+
+ // If there are multiple spills that could be merged, bias a little
+ // to hoist the spill.
+ BranchProbability MarginProb = (SpillsInSubTree.size() > 1)
+ ? BranchProbability(9, 10)
+ : BranchProbability(1, 1);
+ if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
+ // Hoist: Move spills to current Block.
+ for (const auto SpillBB : SpillsInSubTree) {
+ // When SpillBB is a BB contains original spill, insert the spill
+ // to SpillsToRm.
+ if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
+ !SpillsToKeep[SpillBB]) {
+ MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
+ SpillsToRm.push_back(SpillToRm);
+ }
+ // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
+ SpillsToKeep.erase(SpillBB);
+ }
+ // Current Block is the BB containing the new hoisted spill. Add it to
+ // SpillsToKeep. LiveReg is the source of the new spill.
+ SpillsToKeep[*RIt] = LiveReg;
+ DEBUG({
+ dbgs() << "spills in BB: ";
+ for (const auto Rspill : SpillsInSubTree)
+ dbgs() << Rspill->getBlock()->getNumber() << " ";
+ dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
+ << "\n";
+ });
+ SpillsInSubTree.clear();
+ SpillsInSubTree.insert(*RIt);
+ SubTreeCost = MBFI.getBlockFreq(Block);
+ }
+ }
+ // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
+ // save them to SpillsToIns.
+ for (const auto Ent : SpillsToKeep) {
+ if (Ent.second)
+ SpillsToIns[Ent.first->getBlock()] = Ent.second;
+ }
+}
+
+/// For spills with equal values, remove redundant spills and hoist those left
+/// to less hot spots.
+///
+/// Spills with equal values will be collected into the same set in
+/// MergeableSpills when spill is inserted. These equal spills are originated
+/// from the same defining instruction and are dominated by the instruction.
+/// Before hoisting all the equal spills, redundant spills inside in the same
+/// BB are first marked to be deleted. Then starting from the spills left, walk
+/// up on the dominator tree towards the Root node where the define instruction
+/// is located, mark the dominated spills to be deleted along the way and
+/// collect the BB nodes on the path from non-dominated spills to the define
+/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
+/// where we are considering to hoist the spills. We iterate the WorkSet in
+/// bottom-up order, and for each node, we will decide whether to hoist spills
+/// inside its subtree to that node. In this way, we can get benefit locally
+/// even if hoisting all the equal spills to one cold place is impossible.
+///
+void HoistSpillHelper::hoistAllSpills() {
+ SmallVector<unsigned, 4> NewVRegs;
+ LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
+
+ // Save the mapping between stackslot and its original reg.
+ DenseMap<int, unsigned> SlotToOrigReg;
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ int Slot = VRM.getStackSlot(Reg);
+ if (Slot != VirtRegMap::NO_STACK_SLOT)
+ SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
+ unsigned Original = VRM.getPreSplitReg(Reg);
+ if (!MRI.def_empty(Reg))
+ Virt2SiblingsMap[Original].insert(Reg);
+ }
+
+ // Each entry in MergeableSpills contains a spill set with equal values.
+ for (auto &Ent : MergeableSpills) {
+ int Slot = Ent.first.first;
+ unsigned OrigReg = SlotToOrigReg[Slot];
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ VNInfo *OrigVNI = Ent.first.second;
+ SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
+ if (Ent.second.empty())
+ continue;
+
+ DEBUG({
+ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
+ << "Equal spills in BB: ";
+ for (const auto spill : EqValSpills)
+ dbgs() << spill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // SpillsToRm is the spill set to be removed from EqValSpills.
+ SmallVector<MachineInstr *, 16> SpillsToRm;
+ // SpillsToIns is the spill set to be newly inserted after hoisting.
+ DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
+
+ runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+
+ DEBUG({
+ dbgs() << "Finally inserted spills in BB: ";
+ for (const auto Ispill : SpillsToIns)
+ dbgs() << Ispill.first->getNumber() << " ";
+ dbgs() << "\nFinally removed spills in BB: ";
+ for (const auto Rspill : SpillsToRm)
+ dbgs() << Rspill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // Stack live range update.
+ LiveInterval &StackIntvl = LSS.getInterval(Slot);
+ if (!SpillsToIns.empty() || !SpillsToRm.empty())
+ StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
+ StackIntvl.getValNumInfo(0));
+
+ // Insert hoisted spills.
+ for (auto const Insert : SpillsToIns) {
+ MachineBasicBlock *BB = Insert.first;
+ unsigned LiveReg = Insert.second;
+ MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
+ TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
+ MRI.getRegClass(LiveReg), &TRI);
+ LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
+ ++NumSpills;
+ }
+
+ // Remove redundant spills or change them to dead instructions.
+ NumSpills -= SpillsToRm.size();
+ for (auto const RMEnt : SpillsToRm) {
+ RMEnt->setDesc(TII.get(TargetOpcode::KILL));
+ for (unsigned i = RMEnt->getNumOperands(); i; --i) {
+ MachineOperand &MO = RMEnt->getOperand(i - 1);
+ if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
+ RMEnt->RemoveOperand(i - 1);
+ }
+ }
+ Edit.eliminateDeadDefs(SpillsToRm, None, AA);
+ }
+}
+
+/// For VirtReg clone, the \p New register should have the same physreg or
+/// stackslot as the \p old register.
+void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ if (VRM.hasPhys(Old))
+ VRM.assignVirt2Phys(New, VRM.getPhys(Old));
+ else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT)
+ VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old));
+ else
+ llvm_unreachable("VReg should be assigned either physreg or stackslot");
+}
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 724f1d6..3f11119 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,6 +1,6 @@
-//=----------------------- InterleavedAccessPass.cpp -----------------------==//
+//===--------------------- InterleavedAccessPass.cpp ----------------------===//
//
-// The LLVM Compiler Infrastructure
+// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
@@ -8,16 +8,18 @@
//===----------------------------------------------------------------------===//
//
// This file implements the Interleaved Access pass, which identifies
-// interleaved memory accesses and transforms into target specific intrinsics.
+// interleaved memory accesses and transforms them into target specific
+// intrinsics.
//
// An interleaved load reads data from memory into several vectors, with
// DE-interleaving the data on a factor. An interleaved store writes several
// vectors to memory with RE-interleaving the data on a factor.
//
-// As interleaved accesses are hard to be identified in CodeGen (mainly because
-// the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR),
-// we identify and transform them to intrinsics in this pass. So the intrinsics
-// can be easily matched into target specific instructions later in CodeGen.
+// As interleaved accesses are difficult to identified in CodeGen (mainly
+// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
+// IR), we identify and transform them to intrinsics in this pass so the
+// intrinsics can be easily matched into target specific instructions later in
+// CodeGen.
//
// E.g. An interleaved load (Factor = 2):
// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
@@ -38,6 +40,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
@@ -56,10 +59,6 @@ static cl::opt<bool> LowerInterleavedAccesses(
static unsigned MaxFactor; // The maximum supported interleave factor.
-namespace llvm {
-static void initializeInterleavedAccessPass(PassRegistry &);
-}
-
namespace {
class InterleavedAccess : public FunctionPass {
@@ -67,7 +66,7 @@ class InterleavedAccess : public FunctionPass {
public:
static char ID;
InterleavedAccess(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}
@@ -75,7 +74,13 @@ public:
bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
private:
+ DominatorTree *DT;
const TargetMachine *TM;
const TargetLowering *TLI;
@@ -86,13 +91,26 @@ private:
/// \brief Transform an interleaved store into target specific intrinsics.
bool lowerInterleavedStore(StoreInst *SI,
SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// \brief Returns true if the uses of an interleaved load by the
+ /// extractelement instructions in \p Extracts can be replaced by uses of the
+ /// shufflevector instructions in \p Shuffles instead. If so, the necessary
+ /// replacements are also performed.
+ bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles);
};
} // end anonymous namespace.
char InterleavedAccess::ID = 0;
-INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",
- "Lower interleaved memory accesses to target specific intrinsics",
- false, false)
+INITIALIZE_TM_PASS_BEGIN(
+ InterleavedAccess, "interleaved-access",
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_TM_PASS_END(
+ InterleavedAccess, "interleaved-access",
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
return new InterleavedAccess(TM);
@@ -181,9 +199,18 @@ bool InterleavedAccess::lowerInterleavedLoad(
return false;
SmallVector<ShuffleVectorInst *, 4> Shuffles;
+ SmallVector<ExtractElementInst *, 4> Extracts;
- // Check if all users of this load are shufflevectors.
+ // Check if all users of this load are shufflevectors. If we encounter any
+ // users that are extractelement instructions, we save them to later check if
+ // they can be modifed to extract from one of the shufflevectors instead of
+ // the load.
for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
+ auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+ if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
+ Extracts.push_back(Extract);
+ continue;
+ }
ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
return false;
@@ -219,6 +246,11 @@ bool InterleavedAccess::lowerInterleavedLoad(
Indices.push_back(Index);
}
+ // Try and modify users of the load that are extractelement instructions to
+ // use the shufflevector instructions instead of the load.
+ if (!tryReplaceExtracts(Extracts, Shuffles))
+ return false;
+
DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
// Try to create target specific intrinsics to replace the load and shuffles.
@@ -232,6 +264,73 @@ bool InterleavedAccess::lowerInterleavedLoad(
return true;
}
+bool InterleavedAccess::tryReplaceExtracts(
+ ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles) {
+
+ // If there aren't any extractelement instructions to modify, there's nothing
+ // to do.
+ if (Extracts.empty())
+ return true;
+
+ // Maps extractelement instructions to vector-index pairs. The extractlement
+ // instructions will be modified to use the new vector and index operands.
+ DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
+
+ for (auto *Extract : Extracts) {
+
+ // The vector index that is extracted.
+ auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
+ auto Index = IndexOperand->getSExtValue();
+
+ // Look for a suitable shufflevector instruction. The goal is to modify the
+ // extractelement instruction (which uses an interleaved load) to use one
+ // of the shufflevector instructions instead of the load.
+ for (auto *Shuffle : Shuffles) {
+
+ // If the shufflevector instruction doesn't dominate the extract, we
+ // can't create a use of it.
+ if (!DT->dominates(Shuffle, Extract))
+ continue;
+
+ // Inspect the indices of the shufflevector instruction. If the shuffle
+ // selects the same index that is extracted, we can modify the
+ // extractelement instruction.
+ SmallVector<int, 4> Indices;
+ Shuffle->getShuffleMask(Indices);
+ for (unsigned I = 0; I < Indices.size(); ++I)
+ if (Indices[I] == Index) {
+ assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
+ "Vector operations do not match");
+ ReplacementMap[Extract] = std::make_pair(Shuffle, I);
+ break;
+ }
+
+ // If we found a suitable shufflevector instruction, stop looking.
+ if (ReplacementMap.count(Extract))
+ break;
+ }
+
+ // If we did not find a suitable shufflevector instruction, the
+ // extractelement instruction cannot be modified, so we must give up.
+ if (!ReplacementMap.count(Extract))
+ return false;
+ }
+
+ // Finally, perform the replacements.
+ IRBuilder<> Builder(Extracts[0]->getContext());
+ for (auto &Replacement : ReplacementMap) {
+ auto *Extract = Replacement.first;
+ auto *Vector = Replacement.second.first;
+ auto Index = Replacement.second.second;
+ Builder.SetInsertPoint(Extract);
+ Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
+ Extract->eraseFromParent();
+ }
+
+ return true;
+}
+
bool InterleavedAccess::lowerInterleavedStore(
StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
if (!SI->isSimple())
@@ -264,6 +363,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
MaxFactor = TLI->getMaxSupportedInterleaveFactor();
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 1c27377..9eb43d2 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
@@ -42,6 +43,10 @@ static cl::opt<cl::boolOrDefault>
EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
+static cl::opt<bool>
+ EnableGlobalISel("global-isel", cl::Hidden, cl::init(false),
+ cl::desc("Enable the \"global\" instruction selector"));
+
void LLVMTargetMachine::initAsmInfo() {
MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
MII = TheTarget.createMCInstrInfo();
@@ -65,8 +70,15 @@ void LLVMTargetMachine::initAsmInfo() {
if (Options.DisableIntegratedAS)
TmpAsmInfo->setUseIntegratedAssembler(false);
+ TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
+
if (Options.CompressDebugSections)
- TmpAsmInfo->setCompressDebugSections(true);
+ TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu);
+
+ TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
+
+ if (Options.ExceptionModel != ExceptionHandling::None)
+ TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
AsmInfo = TmpAsmInfo;
}
@@ -78,7 +90,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
- CodeGenInfo = T.createMCCodeGenInfo(TT.str(), RM, CM, OL);
+ T.adjustCodeGenOpts(TT, RM, CM);
+ this->RM = RM;
+ this->CMModel = CM;
+ this->OptLevel = OL;
}
TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
@@ -87,6 +102,20 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
});
}
+MachineModuleInfo &
+LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const {
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getMCRegisterInfo(),
+ getObjFileLowering());
+ PM.add(MMI);
+ return *MMI;
+}
+
+void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM,
+ MachineFunctionInitializer *MFInitializer) const {
+ PM.add(new MachineFunctionAnalysis(*this, MFInitializer));
+}
+
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *
addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
@@ -94,6 +123,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
AnalysisID StartAfter, AnalysisID StopAfter,
MachineFunctionInitializer *MFInitializer = nullptr) {
+ // When in emulated TLS mode, add the LowerEmuTLS pass.
+ if (TM->Options.EmulatedTLS)
+ PM.add(createLowerEmuTLSPass(TM));
+
+ PM.add(createPreISelIntrinsicLoweringPass());
+
// Add internal analysis passes from the target machine.
PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
@@ -115,14 +150,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
PassConfig->addISelPrepare();
- // Install a MachineModuleInfo class, which is an immutable pass that holds
- // all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(
- *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering());
- PM.add(MMI);
-
- // Set up a MachineFunction for the rest of CodeGen to work on.
- PM.add(new MachineFunctionAnalysis(*TM, MFInitializer));
+ MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM);
+ TM->addMachineFunctionAnalysis(PM, MFInitializer);
// Enable FastISel with -fast, but allow that to be overridden.
TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
@@ -132,14 +161,25 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
TM->setFastISel(true);
// Ask the target for an isel.
- if (PassConfig->addInstSelector())
+ if (LLVM_UNLIKELY(EnableGlobalISel)) {
+ if (PassConfig->addIRTranslator())
+ return nullptr;
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ PassConfig->addPreRegBankSelect();
+
+ if (PassConfig->addRegBankSelect())
+ return nullptr;
+
+ } else if (PassConfig->addInstSelector())
return nullptr;
PassConfig->addMachinePasses();
PassConfig->setInitialized();
- return &MMI->getContext();
+ return &MMI.getContext();
}
bool LLVMTargetMachine::addPassesToEmitFile(
@@ -154,7 +194,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
if (StopAfter) {
- PM.add(createPrintMIRPass(outs()));
+ PM.add(createPrintMIRPass(Out));
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index be61a20..b810176 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -113,8 +113,7 @@ LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
// The scope that we were created with could have an extra file - which
// isn't what we care about in this case.
- if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
- Scope = File->getScope();
+ Scope = Scope->getNonLexicalBlockFileScope();
if (auto *IA = DL->getInlinedAt()) {
auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
@@ -140,8 +139,8 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
/// getOrCreateRegularScope - Find or create a regular lexical scope.
LexicalScope *
LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
- if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
- Scope = File->getScope();
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
auto I = LexicalScopeMap.find(Scope);
if (I != LexicalScopeMap.end())
@@ -169,6 +168,8 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
LexicalScope *
LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
const DILocation *InlinedAt) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt);
auto I = InlinedLexicalScopeMap.find(P);
if (I != InlinedLexicalScopeMap.end())
@@ -192,9 +193,7 @@ LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
LexicalScope *
LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
assert(Scope && "Invalid Scope encoding!");
-
- if (auto *File = dyn_cast<DILexicalBlockFile>(Scope))
- Scope = File->getScope();
+ Scope = Scope->getNonLexicalBlockFileScope();
auto I = AbstractScopeMap.find(Scope);
if (I != AbstractScopeMap.end())
return &I->second;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index b9937e5..4ff88d5 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -18,22 +18,24 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <queue>
#include <list>
+#include <queue>
using namespace llvm;
@@ -43,48 +45,163 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
namespace {
+// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// register, returns the number of this register. In the other case, returns 0.
+static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue() && "expected a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ // If location of variable is described using a register (directly
+ // or indirectly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ /// Based on std::pair so it can be used as an index into a DenseMap.
typedef std::pair<const DILocalVariable *, const DILocation *>
- InlinedVariable;
-
+ DebugVariableBase;
/// A potentially inlined instance of a variable.
- struct DebugVariable {
- const DILocalVariable *Var;
- const DILocation *InlinedAt;
+ struct DebugVariable : public DebugVariableBase {
+ DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
+ : DebugVariableBase(Var, InlinedAt) {}
- DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt)
- : Var(_var), InlinedAt(_inlinedAt) {}
+ const DILocalVariable *getVar() const { return this->first; };
+ const DILocation *getInlinedAt() const { return this->second; };
- bool operator==(const DebugVariable &DV) const {
- return (Var == DV.Var) && (InlinedAt == DV.InlinedAt);
+ bool operator<(const DebugVariable &DV) const {
+ if (getVar() == DV.getVar())
+ return getInlinedAt() < DV.getInlinedAt();
+ return getVar() < DV.getVar();
}
};
- /// Member variables and functions for Range Extension across basic blocks.
+ /// A pair of debug variable and value location.
struct VarLoc {
- DebugVariable Var;
- const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr.
+ const DebugVariable Var;
+ const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
+
+ enum { InvalidKind = 0, RegisterKind } Kind;
+
+ /// The value location. Stored separately to avoid repeatedly
+ /// extracting it from MI.
+ union {
+ struct {
+ uint32_t RegNo;
+ uint32_t Offset;
+ } RegisterLoc;
+ uint64_t Hash;
+ } Loc;
+
+ VarLoc(const MachineInstr &MI)
+ : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
+ Kind(InvalidKind) {
+ static_assert((sizeof(Loc) == sizeof(uint64_t)),
+ "hash does not cover all members of Loc");
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ if (int RegNo = isDbgValueDescribedByReg(MI)) {
+ Kind = RegisterKind;
+ Loc.RegisterLoc.RegNo = RegNo;
+ uint64_t Offset =
+ MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
+ // We don't support offsets larger than 4GiB here. They are
+ // slated to be replaced with DIExpressions anyway.
+ if (Offset >= (1ULL << 32))
+ Kind = InvalidKind;
+ else
+ Loc.RegisterLoc.Offset = Offset;
+ }
+ }
+
+ /// If this variable is described by a register, return it,
+ /// otherwise return 0.
+ unsigned isDescribedByReg() const {
+ if (Kind == RegisterKind)
+ return Loc.RegisterLoc.RegNo;
+ return 0;
+ }
- VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {}
+ void dump() const { MI.dump(); }
- bool operator==(const VarLoc &V) const;
+ bool operator==(const VarLoc &Other) const {
+ return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
+ }
+
+ /// This operator guarantees that VarLocs are sorted by Variable first.
+ bool operator<(const VarLoc &Other) const {
+ if (Var == Other.Var)
+ return Loc.Hash < Other.Loc.Hash;
+ return Var < Other.Var;
+ }
};
- typedef std::list<VarLoc> VarLocList;
- typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
+ typedef UniqueVector<VarLoc> VarLocMap;
+ typedef SparseBitVector<> VarLocSet;
+ typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+
+ /// This holds the working set of currently open ranges. For fast
+ /// access, this is done both as a set of VarLocIDs, and a map of
+ /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
+ /// previous open ranges for the same variable.
+ class OpenRangesSet {
+ VarLocSet VarLocs;
+ SmallDenseMap<DebugVariableBase, unsigned, 8> Vars;
+
+ public:
+ const VarLocSet &getVarLocs() const { return VarLocs; }
+
+ /// Terminate all open ranges for Var by removing it from the set.
+ void erase(DebugVariable Var) {
+ auto It = Vars.find(Var);
+ if (It != Vars.end()) {
+ unsigned ID = It->second;
+ VarLocs.reset(ID);
+ Vars.erase(It);
+ }
+ }
+
+ /// Terminate all open ranges listed in \c KillSet by removing
+ /// them from the set.
+ void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) {
+ VarLocs.intersectWithComplement(KillSet);
+ for (unsigned ID : KillSet)
+ Vars.erase(VarLocIDs[ID].Var);
+ }
+
+ /// Insert a new range into the set.
+ void insert(unsigned VarLocID, DebugVariableBase Var) {
+ VarLocs.set(VarLocID);
+ Vars.insert({Var, VarLocID});
+ }
+
+ /// Empty the set.
+ void clear() {
+ VarLocs.clear();
+ Vars.clear();
+ }
+
+ /// Return whether the set is empty or not.
+ bool empty() const {
+ assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent");
+ return VarLocs.empty();
+ }
+ };
- void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
- void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
- bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
- VarLocInMBB &OutLocs);
- bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+ void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs);
+ void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ const VarLocMap &VarLocIDs);
+ bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+ bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
- bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+ bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs);
bool ExtendRanges(MachineFunction &MF);
@@ -98,8 +215,14 @@ public:
/// information we preserve.
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
/// Print to ostream with a message.
- void printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+ void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs, const char *msg,
raw_ostream &Out) const;
/// Calculate the liveness information for the given machine function.
@@ -124,109 +247,95 @@ LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
/// Tell the pass manager which passes we depend on and what information we
/// preserve.
void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
-// \brief If @MI is a DBG_VALUE with debug value described by a defined
-// register, returns the number of this register. In the other case, returns 0.
-static unsigned isDescribedByReg(const MachineInstr &MI) {
- assert(MI.isDebugValue());
- assert(MI.getNumOperands() == 4);
- // If location of variable is described using a register (directly or
- // indirecltly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
-}
-
-// \brief This function takes two DBG_VALUE instructions and returns true
-// if their offsets are equal; otherwise returns false.
-static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) {
- assert(MI1.isDebugValue());
- assert(MI1.getNumOperands() == 4);
-
- assert(MI2.isDebugValue());
- assert(MI2.getNumOperands() == 4);
-
- if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue())
- return true;
-
- // Check if both MIs are indirect and they are equal.
- if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue())
- return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm();
-
- return false;
-}
-
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
-void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
+ const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs,
+ const char *msg,
raw_ostream &Out) const {
- Out << "Printing " << msg << ":\n";
- for (const auto &L : V) {
- Out << "MBB: " << L.first->getName() << ":\n";
- for (const auto &VLL : L.second) {
- Out << " Var: " << VLL.Var.Var->getName();
+ for (const MachineBasicBlock &BB : MF) {
+ const auto &L = V.lookup(&BB);
+ Out << "MBB: " << BB.getName() << ":\n";
+ for (unsigned VLL : L) {
+ const VarLoc &VL = VarLocIDs[VLL];
+ Out << " Var: " << VL.Var.getVar()->getName();
Out << " MI: ";
- (*VLL.MI).dump();
+ VL.dump();
Out << "\n";
}
}
Out << "\n";
}
-bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const {
- return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) &&
- (areOffsetsEqual(*MI, *V.MI));
-}
-
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
-void LiveDebugValues::transferDebugValue(MachineInstr &MI,
- VarLocList &OpenRanges) {
+void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
if (!MI.isDebugValue())
return;
- const DILocalVariable *RawVar = MI.getDebugVariable();
- assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
"Expected inlined-at fields to agree");
- DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
// End all previous ranges of Var.
- OpenRanges.erase(
- std::remove_if(OpenRanges.begin(), OpenRanges.end(),
- [&](const VarLoc &V) { return (Var == V.Var); }),
- OpenRanges.end());
+ DebugVariable V(Var, InlinedAt);
+ OpenRanges.erase(V);
- // Add Var to OpenRanges from this DBG_VALUE.
+ // Add the VarLoc to OpenRanges from this DBG_VALUE.
// TODO: Currently handles DBG_VALUE which has only reg as location.
- if (isDescribedByReg(MI)) {
- VarLoc V(Var, &MI);
- OpenRanges.push_back(std::move(V));
+ if (isDbgValueDescribedByReg(MI)) {
+ VarLoc VL(MI);
+ unsigned ID = VarLocIDs.insert(VL);
+ OpenRanges.insert(ID, VL.Var);
}
}
/// A definition of a register may mark the end of a range.
void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
- VarLocList &OpenRanges) {
+ OpenRangesSet &OpenRanges,
+ const VarLocMap &VarLocIDs) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ SparseBitVector<> KillSet;
for (const MachineOperand &MO : MI.operands()) {
- if (!(MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg())))
- continue;
- // Remove ranges of all aliased registers.
- for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
- OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(),
- [&](const VarLoc &V) {
- return (*RAI ==
- isDescribedByReg(*V.MI));
- }),
- OpenRanges.end());
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ TRI->isPhysicalRegister(MO.getReg())) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ for (unsigned ID : OpenRanges.getVarLocs())
+ if (VarLocIDs[ID].isDescribedByReg() == *RAI)
+ KillSet.set(ID);
+ } else if (MO.isRegMask()) {
+ // Remove ranges of all clobbered registers. Register masks don't usually
+ // list SP as preserved. While the debug info may be off for an
+ // instruction or two around callee-cleanup calls, transferring the
+ // DEBUG_VALUE across the call is still a better user experience.
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ unsigned Reg = VarLocIDs[ID].isDescribedByReg();
+ if (Reg && Reg != SP && MO.clobbersPhysReg(Reg))
+ KillSet.set(ID);
+ }
+ }
}
+ OpenRanges.erase(KillSet, VarLocIDs);
}
/// Terminate all open ranges at the end of the current basic block.
bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
- VarLocList &OpenRanges,
- VarLocInMBB &OutLocs) {
+ OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs,
+ const VarLocMap &VarLocIDs) {
bool Changed = false;
const MachineBasicBlock *CurMBB = MI.getParent();
if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
@@ -235,29 +344,23 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
if (OpenRanges.empty())
return false;
- VarLocList &VLL = OutLocs[CurMBB];
-
- for (auto OR : OpenRanges) {
- // Copy OpenRanges to OutLocs, if not already present.
- assert(OR.MI->isDebugValue());
- DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump(););
- if (std::find_if(VLL.begin(), VLL.end(),
- [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
- VLL.push_back(std::move(OR));
- Changed = true;
- }
- }
+ DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump();
+ });
+ VarLocSet &VLS = OutLocs[CurMBB];
+ Changed = VLS |= OpenRanges.getVarLocs();
OpenRanges.clear();
return Changed;
}
/// This routine creates OpenRanges and OutLocs.
-bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
- VarLocInMBB &OutLocs) {
+bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) {
bool Changed = false;
- transferDebugValue(MI, OpenRanges);
- transferRegisterDef(MI, OpenRanges);
- Changed = transferTerminatorInst(MI, OpenRanges, OutLocs);
+ transferDebugValue(MI, OpenRanges, VarLocIDs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
}
@@ -265,14 +368,14 @@ bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
- VarLocInMBB &InLocs) {
+ VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) {
DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
bool Changed = false;
- VarLocList InLocsT; // Temporary incoming locations.
+ VarLocSet InLocsT; // Temporary incoming locations.
- // For all predecessors of this MBB, find the set of VarLocs that can be
- // joined.
+ // For all predecessors of this MBB, find the set of VarLocs that
+ // can be joined.
for (auto p : MBB.predecessors()) {
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
@@ -284,44 +387,34 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
InLocsT = OL->second;
continue;
}
-
// Join with this predecessor.
- VarLocList &VLL = OL->second;
- InLocsT.erase(
- std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) {
- return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) {
- return (ILT == V);
- }) == VLL.end());
- }), InLocsT.end());
+ InLocsT &= OL->second;
}
if (InLocsT.empty())
return false;
- VarLocList &ILL = InLocs[&MBB];
+ VarLocSet &ILS = InLocs[&MBB];
// Insert DBG_VALUE instructions, if not already inserted.
- for (auto ILT : InLocsT) {
- if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) {
- return (ILT == I);
- }) == ILL.end()) {
- // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
- // new range is started for the var from the mbb's beginning by inserting
- // a new DBG_VALUE. transfer() will end this range however appropriate.
- const MachineInstr *DMI = ILT.MI;
- MachineInstr *MI =
- BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
- DMI->getDebugVariable(), DMI->getDebugExpression());
- if (DMI->isIndirectDebugValue())
- MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
- DEBUG(dbgs() << "Inserted: "; MI->dump(););
- ++NumInserted;
- Changed = true;
-
- VarLoc V(ILT.Var, MI);
- ILL.push_back(std::move(V));
- }
+ VarLocSet Diff = InLocsT;
+ Diff.intersectWithComplement(ILS);
+ for (auto ID : Diff) {
+ // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
+ // new range is started for the var from the mbb's beginning by inserting
+ // a new DBG_VALUE. transfer() will end this range however appropriate.
+ const VarLoc &DiffIt = VarLocIDs[ID];
+ const MachineInstr *DMI = &DiffIt.MI;
+ MachineInstr *MI =
+ BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
+ DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ if (DMI->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ ILS.set(ID);
+ ++NumInserted;
+ Changed = true;
}
return Changed;
}
@@ -336,21 +429,27 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocList OpenRanges; // Ranges that are open until end of bb.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
std::priority_queue<unsigned int, std::vector<unsigned int>,
- std::greater<unsigned int>> Worklist;
+ std::greater<unsigned int>>
+ Worklist;
std::priority_queue<unsigned int, std::vector<unsigned int>,
- std::greater<unsigned int>> Pending;
+ std::greater<unsigned int>>
+ Pending;
+
// Initialize every mbb with OutLocs.
for (auto &MBB : MF)
for (auto &MI : MBB)
- transfer(MI, OpenRanges, OutLocs);
- DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
+ transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
+ dbgs()));
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
unsigned int RPONumber = 0;
@@ -360,7 +459,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
Worklist.push(RPONumber);
++RPONumber;
}
-
// This is a standard "union of predecessor outs" dataflow problem.
// To solve it, we perform join() and transfer() using the two worklist method
// until the ranges converge.
@@ -373,21 +471,23 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined = join(*MBB, OutLocs, InLocs);
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs);
if (MBBJoined) {
MBBJoined = false;
Changed = true;
for (auto &MI : *MBB)
- OLChanged |= transfer(MI, OpenRanges, OutLocs);
- DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
- DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+ OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs,
+ "InLocs after propagating", dbgs()));
if (OLChanged) {
OLChanged = false;
for (auto s : MBB->successors())
- if (!OnPending.count(s)) {
- OnPending.insert(s);
+ if (OnPending.insert(s).second) {
Pending.push(BBToOrder[s]);
}
}
@@ -399,8 +499,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
assert(Pending.empty() && "Pending should be empty");
}
- DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
- DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
+ DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 6dac7db..966b4f1 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -42,6 +42,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <memory>
+#include <utility>
using namespace llvm;
@@ -84,7 +85,7 @@ class UserValueScopes {
SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
public:
- UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {}
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
/// dominates - Return true if current scope dominates at least one machine
/// instruction in a given machine basic block.
@@ -141,8 +142,8 @@ public:
/// UserValue - Create a new UserValue.
UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
DebugLoc L, LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L),
- leader(this), next(nullptr), locInts(alloc) {}
+ : Variable(var), Expression(expr), offset(o), IsIndirect(i),
+ dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {}
/// getLeader - Get the leader of this value's equivalence class.
UserValue *getLeader() {
@@ -172,8 +173,10 @@ public:
return L1;
// Splice L2 before L1's members.
UserValue *End = L2;
- while (End->next)
- End->leader = L1, End = End->next;
+ while (End->next) {
+ End->leader = L1;
+ End = End->next;
+ }
End->leader = L1;
End->next = L1->next;
L1->next = L2;
@@ -302,7 +305,7 @@ class LDVImpl {
/// getUserValue - Find or create a UserValue.
UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
- unsigned Offset, bool IsIndirect, DebugLoc DL);
+ unsigned Offset, bool IsIndirect, const DebugLoc &DL);
/// lookupVirtReg - Find the EC leader for VirtReg or null.
UserValue *lookupVirtReg(unsigned VirtReg);
@@ -311,7 +314,7 @@ class LDVImpl {
/// @param MI DBG_VALUE instruction
/// @param Idx Last valid SLotIndex before instruction.
/// @return True if the DBG_VALUE instruction should be deleted.
- bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+ bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
/// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
/// a UserValue def for each instruction.
@@ -355,7 +358,7 @@ public:
};
} // namespace
-static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
const LLVMContext &Ctx) {
if (!DL)
return;
@@ -456,7 +459,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
unsigned Offset, bool IsIndirect,
- DebugLoc DL) {
+ const DebugLoc &DL) {
UserValue *&Leader = userVarMap[Var];
if (Leader) {
UserValue *UV = Leader->getLeader();
@@ -485,24 +488,23 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
return nullptr;
}
-bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// DBG_VALUE loc, offset, variable
- if (MI->getNumOperands() != 4 ||
- !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) ||
- !MI->getOperand(2).isMetadata()) {
- DEBUG(dbgs() << "Can't handle " << *MI);
+ if (MI.getNumOperands() != 4 ||
+ !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) ||
+ !MI.getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << MI);
return false;
}
// Get or create the UserValue for (variable,offset).
- bool IsIndirect = MI->isIndirectDebugValue();
- unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getDebugVariable();
- const MDNode *Expr = MI->getDebugExpression();
+ bool IsIndirect = MI.isIndirectDebugValue();
+ unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0;
+ const MDNode *Var = MI.getDebugVariable();
+ const MDNode *Expr = MI.getDebugExpression();
//here.
- UserValue *UV =
- getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc());
- UV->addDef(Idx, MI->getOperand(0));
+ UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc());
+ UV->addDef(Idx, MI.getOperand(0));
return true;
}
@@ -518,12 +520,13 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
continue;
}
// DBG_VALUE has no slot index, use the previous instruction instead.
- SlotIndex Idx = MBBI == MBB->begin() ?
- LIS->getMBBStartIdx(MBB) :
- LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot();
+ SlotIndex Idx =
+ MBBI == MBB->begin()
+ ? LIS->getMBBStartIdx(MBB)
+ : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
// Handle consecutive DBG_VALUE instructions with the same slot index.
do {
- if (handleDebugValue(MBBI, Idx)) {
+ if (handleDebugValue(*MBBI, Idx)) {
MBBI = MBB->erase(MBBI);
Changed = true;
} else
@@ -554,8 +557,10 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
Kills->push_back(Start);
return;
}
- if (Segment->end < Stop)
- Stop = Segment->end, ToEnd = false;
+ if (Segment->end < Stop) {
+ Stop = Segment->end;
+ ToEnd = false;
+ }
}
// There could already be a short def at Start.
@@ -569,8 +574,10 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
}
// Limited by the next def.
- if (I.valid() && I.start() < Stop)
- Stop = I.start(), ToEnd = false;
+ if (I.valid() && I.start() < Stop) {
+ Stop = I.start();
+ ToEnd = false;
+ }
// Limited by VNI's live range.
else if (!ToEnd && Kills)
Kills->push_back(Stop);
@@ -608,7 +615,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
// Is LocNo extended to reach this copy? If not, another def may be blocking
// it, or we are looking at a wrong value of LI.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
if (!I.valid() || I.value() != LocNo)
continue;
@@ -1033,7 +1040,7 @@ bool LiveDebugVariables::doInitialization(Module &M) {
}
#ifndef NDEBUG
-void LiveDebugVariables::dump() {
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index 3d36f4d..afe87a5 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -21,12 +21,12 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DebugInfo.h"
namespace llvm {
+template <typename T> class ArrayRef;
class LiveInterval;
class LiveIntervals;
class VirtRegMap;
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 5015800..93c5ca7 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -19,8 +19,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveInterval.h"
+
+#include "LiveRangeUtils.h"
#include "RegisterCoalescer.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -309,10 +310,12 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
size_t Len = size();
do {
size_t Mid = Len >> 1;
- if (Pos < I[Mid].end)
+ if (Pos < I[Mid].end) {
Len = Mid;
- else
- I += Mid + 1, Len -= Mid + 1;
+ } else {
+ I += Mid + 1;
+ Len -= Mid + 1;
+ }
} while (Len);
return I;
}
@@ -814,239 +817,6 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
-/// Helper function for constructMainRangeFromSubranges(): Search the CFG
-/// backwards until we find a place covered by a LiveRange segment that actually
-/// has a valno set.
-static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
- const MachineBasicBlock *MBB,
- SmallPtrSetImpl<const MachineBasicBlock*> &Visited) {
- // We start the search at the end of MBB.
- SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB);
- // In our use case we can't live the area covered by the live segments without
- // finding an actual VNI def.
- LiveRange::iterator I = LR.find(EndIdx.getPrevSlot());
- assert(I != LR.end());
- LiveRange::Segment &S = *I;
- if (S.valno != nullptr)
- return S.valno;
-
- VNInfo *VNI = nullptr;
- // Continue at predecessors (we could even go to idom with domtree available).
- for (const MachineBasicBlock *Pred : MBB->predecessors()) {
- // Avoid going in circles.
- if (!Visited.insert(Pred).second)
- continue;
-
- VNI = searchForVNI(Indexes, LR, Pred, Visited);
- if (VNI != nullptr) {
- S.valno = VNI;
- break;
- }
- }
-
- return VNI;
-}
-
-static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
- SmallPtrSet<const MachineBasicBlock*, 5> Visited;
-
- LiveRange::iterator OutIt;
- VNInfo *PrevValNo = nullptr;
- for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
- LiveRange::Segment &S = *I;
- // Determine final VNI if necessary.
- if (S.valno == nullptr) {
- // This can only happen at the begin of a basic block.
- assert(S.start.isBlock() && "valno should only be missing at block begin");
-
- Visited.clear();
- const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
- for (const MachineBasicBlock *Pred : MBB->predecessors()) {
- VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
- if (VNI != nullptr) {
- S.valno = VNI;
- break;
- }
- }
- assert(S.valno != nullptr && "could not determine valno");
- }
- // Merge with previous segment if it has the same VNI.
- if (PrevValNo == S.valno && OutIt->end == S.start) {
- OutIt->end = S.end;
- } else {
- // Didn't merge. Move OutIt to next segment.
- if (PrevValNo == nullptr)
- OutIt = LI.begin();
- else
- ++OutIt;
-
- if (OutIt != I)
- *OutIt = *I;
- PrevValNo = S.valno;
- }
- }
- // If we merged some segments chop off the end.
- ++OutIt;
- LI.segments.erase(OutIt, LI.end());
-}
-
-void LiveInterval::constructMainRangeFromSubranges(
- const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) {
- // The basic observations on which this algorithm is based:
- // - Each Def/ValNo in a subrange must have a corresponding def on the main
- // range, but not further defs/valnos are necessary.
- // - If any of the subranges is live at a point the main liverange has to be
- // live too, conversily if no subrange is live the main range mustn't be
- // live either.
- // We do this by scanning through all the subranges simultaneously creating new
- // segments in the main range as segments start/ends come up in the subranges.
- assert(hasSubRanges() && "expected subranges to be present");
- assert(segments.empty() && valnos.empty() && "expected empty main range");
-
- // Collect subrange, iterator pairs for the walk and determine first and last
- // SlotIndex involved.
- SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs;
- SlotIndex First;
- SlotIndex Last;
- for (const SubRange &SR : subranges()) {
- if (SR.empty())
- continue;
- SRs.push_back(std::make_pair(&SR, SR.begin()));
- if (!First.isValid() || SR.segments.front().start < First)
- First = SR.segments.front().start;
- if (!Last.isValid() || SR.segments.back().end > Last)
- Last = SR.segments.back().end;
- }
-
- // Walk over all subranges simultaneously.
- Segment CurrentSegment;
- bool ConstructingSegment = false;
- bool NeedVNIFixup = false;
- LaneBitmask ActiveMask = 0;
- SlotIndex Pos = First;
- while (true) {
- SlotIndex NextPos = Last;
- enum {
- NOTHING,
- BEGIN_SEGMENT,
- END_SEGMENT,
- } Event = NOTHING;
- // Which subregister lanes are affected by the current event.
- LaneBitmask EventMask = 0;
- // Whether a BEGIN_SEGMENT is also a valno definition point.
- bool IsDef = false;
- // Find the next begin or end of a subrange segment. Combine masks if we
- // have multiple begins/ends at the same position. Ends take precedence over
- // Begins.
- for (auto &SRP : SRs) {
- const SubRange &SR = *SRP.first;
- const_iterator &I = SRP.second;
- // Advance iterator of subrange to a segment involving Pos; the earlier
- // segments are already merged at this point.
- while (I != SR.end() &&
- (I->end < Pos ||
- (I->end == Pos && (ActiveMask & SR.LaneMask) == 0)))
- ++I;
- if (I == SR.end())
- continue;
- if ((ActiveMask & SR.LaneMask) == 0 &&
- Pos <= I->start && I->start <= NextPos) {
- // Merge multiple begins at the same position.
- if (I->start == NextPos && Event == BEGIN_SEGMENT) {
- EventMask |= SR.LaneMask;
- IsDef |= I->valno->def == I->start;
- } else if (I->start < NextPos || Event != END_SEGMENT) {
- Event = BEGIN_SEGMENT;
- NextPos = I->start;
- EventMask = SR.LaneMask;
- IsDef = I->valno->def == I->start;
- }
- }
- if ((ActiveMask & SR.LaneMask) != 0 &&
- Pos <= I->end && I->end <= NextPos) {
- // Merge multiple ends at the same position.
- if (I->end == NextPos && Event == END_SEGMENT)
- EventMask |= SR.LaneMask;
- else {
- Event = END_SEGMENT;
- NextPos = I->end;
- EventMask = SR.LaneMask;
- }
- }
- }
-
- // Advance scan position.
- Pos = NextPos;
- if (Event == BEGIN_SEGMENT) {
- if (ConstructingSegment && IsDef) {
- // Finish previous segment because we have to start a new one.
- CurrentSegment.end = Pos;
- append(CurrentSegment);
- ConstructingSegment = false;
- }
-
- // Start a new segment if necessary.
- if (!ConstructingSegment) {
- // Determine value number for the segment.
- VNInfo *VNI;
- if (IsDef) {
- VNI = getNextValue(Pos, VNIAllocator);
- } else {
- // We have to reuse an existing value number, if we are lucky
- // then we already passed one of the predecessor blocks and determined
- // its value number (with blocks in reverse postorder this would be
- // always true but we have no such guarantee).
- assert(Pos.isBlock());
- const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos);
- // See if any of the predecessor blocks has a lower number and a VNI
- for (const MachineBasicBlock *Pred : MBB->predecessors()) {
- SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred);
- VNI = getVNInfoBefore(PredEnd);
- if (VNI != nullptr)
- break;
- }
- // Def will come later: We have to do an extra fixup pass.
- if (VNI == nullptr)
- NeedVNIFixup = true;
- }
-
- // In rare cases we can produce adjacent segments with the same value
- // number (if they come from different subranges, but happen to have
- // the same defining instruction). VNIFixup will fix those cases.
- if (!empty() && segments.back().end == Pos &&
- segments.back().valno == VNI)
- NeedVNIFixup = true;
- CurrentSegment.start = Pos;
- CurrentSegment.valno = VNI;
- ConstructingSegment = true;
- }
- ActiveMask |= EventMask;
- } else if (Event == END_SEGMENT) {
- assert(ConstructingSegment);
- // Finish segment if no lane is active anymore.
- ActiveMask &= ~EventMask;
- if (ActiveMask == 0) {
- CurrentSegment.end = Pos;
- append(CurrentSegment);
- ConstructingSegment = false;
- }
- } else {
- // We reached the end of the last subranges and can stop.
- assert(Event == NOTHING);
- break;
- }
- }
-
- // We might not be able to assign new valnos for all segments if the basic
- // block containing the definition comes after a segment using the valno.
- // Do a fixup pass for this uncommon case.
- if (NeedVNIFixup)
- determineMissingVNIs(Indexes, *this);
-
- assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended");
- verify();
-}
-
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
for (const Segment &S : segments)
@@ -1055,12 +825,12 @@ unsigned LiveInterval::getSize() const {
}
raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
- return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")";
+ return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveRange::Segment::dump() const {
- dbgs() << *this << "\n";
+LLVM_DUMP_METHOD void LiveRange::Segment::dump() const {
+ dbgs() << *this << '\n';
}
#endif
@@ -1081,10 +851,10 @@ void LiveRange::print(raw_ostream &OS) const {
for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
++i, ++vnum) {
const VNInfo *vni = *i;
- if (vnum) OS << " ";
- OS << vnum << "@";
+ if (vnum) OS << ' ';
+ OS << vnum << '@';
if (vni->isUnused()) {
- OS << "x";
+ OS << 'x';
} else {
OS << vni->def;
if (vni->isPHIDef())
@@ -1094,22 +864,30 @@ void LiveRange::print(raw_ostream &OS) const {
}
}
+void LiveInterval::SubRange::print(raw_ostream &OS) const {
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange&>(*this);
+}
+
void LiveInterval::print(raw_ostream &OS) const {
OS << PrintReg(reg) << ' ';
super::print(OS);
// Print subranges
- for (const SubRange &SR : subranges()) {
- OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR;
- }
+ for (const SubRange &SR : subranges())
+ OS << SR;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveRange::dump() const {
- dbgs() << *this << "\n";
+LLVM_DUMP_METHOD void LiveRange::dump() const {
+ dbgs() << *this << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveInterval::SubRange::dump() const {
+ dbgs() << *this << '\n';
}
-void LiveInterval::dump() const {
- dbgs() << *this << "\n";
+LLVM_DUMP_METHOD void LiveInterval::dump() const {
+ dbgs() << *this << '\n';
}
#endif
@@ -1206,8 +984,7 @@ void LiveRangeUpdater::print(raw_ostream &OS) const {
OS << '\n';
}
-void LiveRangeUpdater::dump() const
-{
+LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
print(errs());
}
@@ -1405,40 +1182,6 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
return EqClass.getNumClasses();
}
-template<typename LiveRangeT, typename EqClassesT>
-static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
- EqClassesT VNIClasses) {
- // Move segments to new intervals.
- LiveRange::iterator J = LR.begin(), E = LR.end();
- while (J != E && VNIClasses[J->valno->id] == 0)
- ++J;
- for (LiveRange::iterator I = J; I != E; ++I) {
- if (unsigned eq = VNIClasses[I->valno->id]) {
- assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
- "New intervals should be empty");
- SplitLRs[eq-1]->segments.push_back(*I);
- } else
- *J++ = *I;
- }
- LR.segments.erase(J, E);
-
- // Transfer VNInfos to their new owners and renumber them.
- unsigned j = 0, e = LR.getNumValNums();
- while (j != e && VNIClasses[j] == 0)
- ++j;
- for (unsigned i = j; i != e; ++i) {
- VNInfo *VNI = LR.getValNumInfo(i);
- if (unsigned eq = VNIClasses[i]) {
- VNI->id = SplitLRs[eq-1]->getNumValNums();
- SplitLRs[eq-1]->valnos.push_back(VNI);
- } else {
- VNI->id = j;
- LR.valnos[j++] = VNI;
- }
- }
- LR.valnos.resize(j);
-}
-
void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineRegisterInfo &MRI) {
// Rewrite instructions.
@@ -1453,9 +1196,9 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
// called, but it is not a requirement.
SlotIndex Idx;
if (MI->isDebugValue())
- Idx = LIS.getSlotIndexes()->getIndexBefore(MI);
+ Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
else
- Idx = LIS.getInstructionIndex(MI);
+ Idx = LIS.getInstructionIndex(*MI);
LiveQueryResult LRQ = LI.Query(Idx);
const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
// In the case of an <undef> use that isn't tied to any def, VNI will be
@@ -1482,15 +1225,20 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
SubRanges.resize(NumComponents-1, nullptr);
for (unsigned I = 0; I < NumValNos; ++I) {
const VNInfo &VNI = *SR.valnos[I];
- const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
- assert(MainRangeVNI != nullptr
- && "SubRange def must have corresponding main range def");
- unsigned ComponentNum = getEqClass(MainRangeVNI);
- VNIMapping.push_back(ComponentNum);
- if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
- SubRanges[ComponentNum-1]
- = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ unsigned ComponentNum;
+ if (VNI.isUnused()) {
+ ComponentNum = 0;
+ } else {
+ const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+ assert(MainRangeVNI != nullptr
+ && "SubRange def must have corresponding main range def");
+ ComponentNum = getEqClass(MainRangeVNI);
+ if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+ SubRanges[ComponentNum-1]
+ = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ }
}
+ VNIMapping.push_back(ComponentNum);
}
DistributeRange(SR, SubRanges.data(), VNIMapping);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index a506e05..5f3281f 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -9,15 +9,13 @@
//
// This file implements the LiveInterval analysis pass which is used
// by the Linear Scan Register allocator. This pass linearizes the
-// basic blocks of the function in DFS order and uses the
-// LiveVariables pass to conservatively compute live intervals for
+// basic blocks of the function in DFS order and computes live intervals for
// each virtual and physical register.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "LiveRangeCalc.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
@@ -38,7 +36,6 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
-#include <limits>
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -48,7 +45,6 @@ char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
"Live Interval Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
@@ -77,10 +73,6 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- // LiveVariables isn't really required by this analysis, it is only required
- // here to make sure it is live during TwoAddressInstructionPass and
- // PHIElimination. This is temporary.
- AU.addRequired<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addRequiredTransitiveID(MachineDominatorsID);
@@ -197,16 +189,9 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
- bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg);
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI, ShouldTrackSubRegLiveness);
- bool SeparatedComponents = computeDeadValues(LI, nullptr);
- if (SeparatedComponents) {
- assert(ShouldTrackSubRegLiveness
- && "Separated components should only occur for unused subreg defs");
- SmallVector<LiveInterval*, 8> SplitLIs;
- splitSeparateComponents(LI, SplitLIs);
- }
+ LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
+ computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -236,14 +221,18 @@ void LiveIntervals::computeRegMasks() {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
- RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot());
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
RegMaskBits.push_back(MO.getRegMask());
}
}
- // Some block ends, such as funclet returns, create masks.
+ // Some block ends, such as funclet returns, create masks. Put the mask on
+ // the last instruction of the block, because MBB slot index intervals are
+ // half-open.
if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
- RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB));
+ assert(!MBB.empty() && "empty return block?");
+ RegMaskSlots.push_back(
+ Indexes->getInstructionIndex(MBB.back()).getRegSlot());
RegMaskBits.push_back(Mask);
}
@@ -439,7 +428,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
MachineInstr *UseMI = &*(I++);
if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
continue;
- SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
LiveQueryResult LRQ = li->Query(Idx);
VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
@@ -485,13 +474,11 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- bool DeadBeforeDef = false;
unsigned VReg = LI.reg;
if (MRI->shouldTrackSubRegLiveness(VReg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
MI->setRegisterDefReadUndef(VReg);
- DeadBeforeDef = true;
}
}
@@ -507,15 +494,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(VReg, TRI);
-
- // If we have a dead def that is completely separate from the rest of
- // the liverange then we rewrite it to use a different VReg to not violate
- // the rule that the liveness of a virtual register forms a connected
- // component. This should only happen if subregister liveness is tracked.
- if (DeadBeforeDef)
- MayHaveSplitComponents = true;
-
+ MI->addRegisterDead(LI.reg, TRI);
if (dead && MI->allDefsAreDead()) {
DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
@@ -547,7 +526,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
continue;
}
// We only need to visit each instruction once.
- SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+ SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
if (Idx == LastIdx)
continue;
LastIdx = Idx;
@@ -585,9 +564,9 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
VNI->markUnused();
SR.removeSegment(*Segment);
- DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
}
}
@@ -837,24 +816,22 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
return false;
}
-float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse,
- const MachineBlockFrequencyInfo *MBFI,
- const MachineInstr *MI) {
- BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent());
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+ const MachineBlockFrequencyInfo *MBFI,
+ const MachineInstr &MI) {
+ BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent());
const float Scale = 1.0f / MBFI->getEntryFreq();
return (isDef + isUse) * (Freq.getFrequency() * Scale);
}
LiveRange::Segment
-LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) {
+LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
LiveInterval& Interval = createEmptyInterval(reg);
- VNInfo* VN = Interval.getNextValue(
- SlotIndex(getInstructionIndex(startInst).getRegSlot()),
- getVNInfoAllocator());
- LiveRange::Segment S(
- SlotIndex(getInstructionIndex(startInst).getRegSlot()),
- getMBBEndIdx(startInst->getParent()), VN);
+ VNInfo *VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ LiveRange::Segment S(SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst.getParent()), VN);
Interval.addSegment(S);
return S;
@@ -962,10 +939,13 @@ public:
hasRegMask = true;
if (!MO.isReg())
continue;
- // Aggressively clear all kill flags.
- // They are reinserted by VirtRegRewriter.
- if (MO.isUse())
+ if (MO.isUse()) {
+ if (!MO.readsReg())
+ continue;
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
MO.setIsKill(false);
+ }
unsigned Reg = MO.getReg();
if (!Reg)
@@ -1021,172 +1001,296 @@ private:
}
/// Update LR to reflect an instruction has been moved downwards from OldIdx
- /// to NewIdx.
- ///
- /// 1. Live def at OldIdx:
- /// Move def to NewIdx, assert endpoint after NewIdx.
- ///
- /// 2. Live def at OldIdx, killed at NewIdx:
- /// Change to dead def at NewIdx.
- /// (Happens when bundling def+kill together).
- ///
- /// 3. Dead def at OldIdx:
- /// Move def to NewIdx, possibly across another live value.
- ///
- /// 4. Def at OldIdx AND at NewIdx:
- /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx.
- /// (Happens when bundling multiple defs together).
- ///
- /// 5. Value read at OldIdx, killed before NewIdx:
- /// Extend kill to NewIdx.
- ///
+ /// to NewIdx (OldIdx < NewIdx).
void handleMoveDown(LiveRange &LR) {
- // First look for a kill at OldIdx.
- LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
LiveRange::iterator E = LR.end();
- // Is LR even live at OldIdx?
- if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
return;
- // Handle a live-in value.
- if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
- bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
// If the live-in value already extends to NewIdx, there is nothing to do.
- if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
+ if (SlotIndex::isEarlierEqualInstr(NewIdx, OldIdxIn->end))
return;
// Aggressively remove all kill flags from the old kill point.
// Kill flags shouldn't be used while live intervals exist, they will be
// reinserted by VirtRegRewriter.
- if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
- for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
+ for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
if (MO->isReg() && MO->isUse())
MO->setIsKill(false);
- // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by
- // overlapping ranges. Case 5 above.
- I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
- // If this was a kill, there may also be a def. Otherwise we're done.
+
+ // Is there a def before NewIdx which is not OldIdx?
+ LiveRange::iterator Next = std::next(OldIdxIn);
+ if (Next != E && !SlotIndex::isSameInstr(OldIdx, Next->start) &&
+ SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // If we are here then OldIdx was just a use but not a def. We only have
+ // to ensure liveness extends to NewIdx.
+ LiveRange::iterator NewIdxIn =
+ LR.advanceTo(Next, NewIdx.getBaseIndex());
+ // Extend the segment before NewIdx if necessary.
+ if (NewIdxIn == E ||
+ !SlotIndex::isEarlierInstr(NewIdxIn->start, NewIdx)) {
+ LiveRange::iterator Prev = std::prev(NewIdxIn);
+ Prev->end = NewIdx.getRegSlot();
+ }
+ return;
+ }
+
+ // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR
+ // invalid by overlapping ranges.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber());
+ // If this was not a kill, then there was no def and we're done.
if (!isKill)
return;
- ++I;
+
+ // Did we have a Def at OldIdx?
+ OldIdxOut = Next;
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
+ return;
+ } else {
+ OldIdxOut = OldIdxIn;
}
- // Check for a def at OldIdx.
- if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
- return;
- // We have a def at OldIdx.
- VNInfo *DefVNI = I->valno;
- assert(DefVNI->def == I->start && "Inconsistent def");
- DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
- // If the defined value extends beyond NewIdx, just move the def down.
- // This is case 1 above.
- if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
- I->start = DefVNI->def;
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+
+ // If the defined value extends beyond NewIdx, just move the beginning
+ // of the segment to NewIdx.
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ if (SlotIndex::isEarlierInstr(NewIdxDef, OldIdxOut->end)) {
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = OldIdxVNI->def;
return;
}
- // The remaining possibilities are now:
- // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
- // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
- // In either case, it is possible that there is an existing def at NewIdx.
- assert((I->end == OldIdx.getDeadSlot() ||
- SlotIndex::isSameInstr(I->end, NewIdx)) &&
- "Cannot move def below kill");
- LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot());
- if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
- // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
- // coalesced into that value.
- assert(NewI->valno != DefVNI && "Multiple defs of value?");
- LR.removeValNo(DefVNI);
+
+ // If we are here then we have a Definition at OldIdx which ends before
+ // NewIdx.
+
+ // Is there an existing Def at NewIdx?
+ LiveRange::iterator AfterNewIdx
+ = LR.advanceTo(OldIdxOut, NewIdx.getRegSlot());
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+ if (!OldIdxDefIsDead &&
+ SlotIndex::isEarlierInstr(OldIdxOut->end, NewIdxDef)) {
+ // OldIdx is not a dead def, and NewIdxDef is inside a new interval.
+ VNInfo *DefVNI;
+ if (OldIdxOut != LR.begin() &&
+ !SlotIndex::isEarlierInstr(std::prev(OldIdxOut)->end,
+ OldIdxOut->start)) {
+ // There is no gap between OldIdxOut and its predecessor anymore,
+ // merge them.
+ LiveRange::iterator IPrev = std::prev(OldIdxOut);
+ DefVNI = OldIdxVNI;
+ IPrev->end = OldIdxOut->end;
+ } else {
+ // The value is live in to OldIdx
+ LiveRange::iterator INext = std::next(OldIdxOut);
+ assert(INext != E && "Must have following segment");
+ // We merge OldIdxOut and its successor. As we're dealing with subreg
+ // reordering, there is always a successor to OldIdxOut in the same BB
+ // We don't need INext->valno anymore and will reuse for the new segment
+ // we create later.
+ DefVNI = OldIdxVNI;
+ INext->start = OldIdxOut->end;
+ INext->valno->def = INext->start;
+ }
+ // If NewIdx is behind the last segment, extend that and append a new one.
+ if (AfterNewIdx == E) {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn -| end
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS -| end
+ std::copy(std::next(OldIdxOut), E, OldIdxOut);
+ // The last segment is undefined now, reuse it for a dead def.
+ LiveRange::iterator NewSegment = std::prev(E);
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ DefVNI);
+ DefVNI->def = NewIdxDef;
+
+ LiveRange::iterator Prev = std::prev(NewSegment);
+ Prev->end = NewIdxDef;
+ } else {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn/AfterNewIdx -| |- Next -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- Xn/AfterNewIdx -| |- Next -|
+ std::copy(std::next(OldIdxOut), std::next(AfterNewIdx), OldIdxOut);
+ LiveRange::iterator Prev = std::prev(AfterNewIdx);
+ // We have two cases:
+ if (SlotIndex::isEarlierInstr(Prev->start, NewIdxDef)) {
+ // Case 1: NewIdx is inside a liverange. Split this liverange at
+ // NewIdxDef into the segment "Prev" followed by "NewSegment".
+ LiveRange::iterator NewSegment = AfterNewIdx;
+ *NewSegment = LiveRange::Segment(NewIdxDef, Prev->end, Prev->valno);
+ Prev->valno->def = NewIdxDef;
+
+ *Prev = LiveRange::Segment(Prev->start, NewIdxDef, DefVNI);
+ DefVNI->def = Prev->start;
+ } else {
+ // Case 2: NewIdx is in a lifetime hole. Keep AfterNewIdx as is and
+ // turn Prev into a segment from NewIdx to AfterNewIdx->start.
+ *Prev = LiveRange::Segment(NewIdxDef, AfterNewIdx->start, DefVNI);
+ DefVNI->def = NewIdxDef;
+ assert(DefVNI != AfterNewIdx->valno);
+ }
+ }
return;
}
- // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
- // If the def at OldIdx was dead, we allow it to be moved across other LR
- // values. The new range should be placed immediately before NewI, move any
- // intermediate ranges up.
- assert(NewI != I && "Inconsistent iterators");
- std::copy(std::next(I), NewI, I);
- *std::prev(NewI)
- = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
+
+ if (AfterNewIdx != E &&
+ SlotIndex::isSameInstr(AfterNewIdx->start, NewIdxDef)) {
+ // There is an existing def at NewIdx. The def at OldIdx is coalesced into
+ // that value.
+ assert(AfterNewIdx->valno != OldIdxVNI && "Multiple defs of value?");
+ LR.removeValNo(OldIdxVNI);
+ } else {
+ // There was no existing def at NewIdx. We need to create a dead def
+ // at NewIdx. Shift segments over the old OldIdxOut segment, this frees
+ // a new segment at the place where we want to construct the dead def.
+ // |- OldIdxOut -| |- X0 -| ... |- Xn -| |- AfterNewIdx -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS. -| |- AfterNewIdx -|
+ assert(AfterNewIdx != OldIdxOut && "Inconsistent iterators");
+ std::copy(std::next(OldIdxOut), AfterNewIdx, OldIdxOut);
+ // We can reuse OldIdxVNI now.
+ LiveRange::iterator NewSegment = std::prev(AfterNewIdx);
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ NewSegmentVNI->def = NewIdxDef;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ }
}
/// Update LR to reflect an instruction has been moved upwards from OldIdx
- /// to NewIdx.
- ///
- /// 1. Live def at OldIdx:
- /// Hoist def to NewIdx.
- ///
- /// 2. Dead def at OldIdx:
- /// Hoist def+end to NewIdx, possibly move across other values.
- ///
- /// 3. Dead def at OldIdx AND existing def at NewIdx:
- /// Remove value defined at OldIdx, coalescing it with existing value.
- ///
- /// 4. Live def at OldIdx AND existing def at NewIdx:
- /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
- /// (Happens when bundling multiple defs together).
- ///
- /// 5. Value killed at OldIdx:
- /// Hoist kill to NewIdx, then scan for last kill between NewIdx and
- /// OldIdx.
- ///
+ /// to NewIdx (NewIdx < OldIdx).
void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
- // First look for a kill at OldIdx.
- LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
LiveRange::iterator E = LR.end();
- // Is LR even live at OldIdx?
- if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
return;
- // Handle a live-in value.
- if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
- // If the live-in value isn't killed here, there is nothing to do.
- if (!SlotIndex::isSameInstr(OldIdx, I->end))
- return;
- // Adjust I->end to end at NewIdx. If we are hoisting a kill above
- // another use, we need to search for that use. Case 5 above.
- I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
- ++I;
- // If OldIdx also defines a value, there couldn't have been another use.
- if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
- // No def, search for the new kill.
- // This can never be an early clobber kill since there is no def.
- std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot();
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
+ // If the live-in value isn't killed here, then we have no Def at
+ // OldIdx, moreover the value must be live at NewIdx so there is nothing
+ // to do.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ if (!isKill)
return;
- }
- }
- // Now deal with the def at OldIdx.
- assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
- VNInfo *DefVNI = I->valno;
- assert(DefVNI->def == I->start && "Inconsistent def");
- DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
-
- // Check for an existing def at NewIdx.
- LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot());
- if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
- assert(NewI->valno != DefVNI && "Same value defined more than once?");
- // There is an existing def at NewIdx.
- if (I->end.isDead()) {
- // Case 3: Remove the dead def at OldIdx.
- LR.removeValNo(DefVNI);
+ // At this point we have to move OldIdxIn->end back to the nearest
+ // previous use or (dead-)def but no further than NewIdx.
+ SlotIndex DefBeforeOldIdx
+ = std::max(OldIdxIn->start.getDeadSlot(),
+ NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()));
+ OldIdxIn->end = findLastUseBefore(DefBeforeOldIdx, Reg, LaneMask);
+
+ // Did we have a Def at OldIdx? If not we are done now.
+ OldIdxOut = std::next(OldIdxIn);
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
return;
- }
- // Case 4: Replace def at NewIdx with live def at OldIdx.
- I->start = DefVNI->def;
- LR.removeValNo(NewI->valno);
- return;
+ } else {
+ OldIdxOut = OldIdxIn;
+ OldIdxIn = OldIdxOut != LR.begin() ? std::prev(OldIdxOut) : E;
}
- // There is no existing def at NewIdx. Hoist DefVNI.
- if (!I->end.isDead()) {
- // Leave the end point of a live def.
- I->start = DefVNI->def;
- return;
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+
+ // Is there an existing def at NewIdx?
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ LiveRange::iterator NewIdxOut = LR.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewIdxOut->start, NewIdx)) {
+ assert(NewIdxOut->valno != OldIdxVNI &&
+ "Same value defined more than once?");
+ // If OldIdx was a dead def remove it.
+ if (!OldIdxDefIsDead) {
+ // Remove segment starting at NewIdx and move begin of OldIdxOut to
+ // NewIdx so it can take its place.
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = NewIdxDef;
+ LR.removeValNo(NewIdxOut->valno);
+ } else {
+ // Simply remove the dead def at OldIdx.
+ LR.removeValNo(OldIdxVNI);
+ }
+ } else {
+ // Previously nothing was live after NewIdx, so all we have to do now is
+ // move the begin of OldIdxOut to NewIdx.
+ if (!OldIdxDefIsDead) {
+ // Do we have any intermediate Defs between OldIdx and NewIdx?
+ if (OldIdxIn != E &&
+ SlotIndex::isEarlierInstr(NewIdxDef, OldIdxIn->start)) {
+ // OldIdx is not a dead def and NewIdx is before predecessor start.
+ LiveRange::iterator NewIdxIn = NewIdxOut;
+ assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
+ const SlotIndex SplitPos = NewIdxDef;
+
+ // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
+ OldIdxIn->valno);
+ // OldIdxIn and OldIdxVNI are now undef and can be overridden.
+ // We Slide [NewIdxIn, OldIdxIn) down one position.
+ // |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
+ // => |- undef/NexIdxIn -| |- X0 -| ... |- Xn-1 -| |- Xn/OldIdxOut -|
+ std::copy_backward(NewIdxIn, OldIdxIn, OldIdxOut);
+ // NewIdxIn is now considered undef so we can reuse it for the moved
+ // value.
+ LiveRange::iterator NewSegment = NewIdxIn;
+ LiveRange::iterator Next = std::next(NewSegment);
+ if (SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // There is no gap between NewSegment and its predecessor.
+ *NewSegment = LiveRange::Segment(Next->start, SplitPos,
+ Next->valno);
+ *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+ Next->valno->def = SplitPos;
+ } else {
+ // There is a gap between NewSegment and its predecessor
+ // Value becomes live in.
+ *NewSegment = LiveRange::Segment(SplitPos, Next->start, OldIdxVNI);
+ NewSegment->valno->def = SplitPos;
+ }
+ } else {
+ // Leave the end point of a live def.
+ OldIdxOut->start = NewIdxDef;
+ OldIdxVNI->def = NewIdxDef;
+ if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))
+ OldIdxIn->end = NewIdx.getRegSlot();
+ }
+ } else {
+ // OldIdxVNI is a dead def. It may have been moved across other values
+ // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
+ // down one position.
+ // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - |
+ // => |- undef/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -|
+ std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut));
+ // OldIdxVNI can be reused now to build a new dead def segment.
+ LiveRange::iterator NewSegment = NewIdxOut;
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ NewSegmentVNI->def = NewIdxDef;
+ }
}
-
- // DefVNI is a dead def. It may have been moved across other values in LR,
- // so move I up to NewI. Slide [NewI;I) down one position.
- std::copy_backward(NewI, I, std::next(I));
- *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
}
void updateRegMaskSlots() {
@@ -1205,29 +1309,31 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) {
-
+ SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
+ LaneBitmask LaneMask) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- SlotIndex LastUse = NewIdx;
+ SlotIndex LastUse = Before;
for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
unsigned SubReg = MO.getSubReg();
if (SubReg != 0 && LaneMask != 0
&& (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
continue;
- const MachineInstr *MI = MO.getParent();
+ const MachineInstr &MI = *MO.getParent();
SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
if (InstSlot > LastUse && InstSlot < OldIdx)
- LastUse = InstSlot;
+ LastUse = InstSlot.getRegSlot();
}
return LastUse;
}
// This is a regunit interval, so scanning the use list could be very
// expensive. Scan upwards from OldIdx instead.
- assert(NewIdx < OldIdx && "Expected upwards move");
+ assert(Before < OldIdx && "Expected upwards move");
SlotIndexes *Indexes = LIS.getSlotIndexes();
- MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Before);
// OldIdx may not correspond to an instruction any longer, so set MII to
// point to the next instruction after OldIdx, or MBB->end().
@@ -1241,44 +1347,44 @@ private:
while (MII != Begin) {
if ((--MII)->isDebugValue())
continue;
- SlotIndex Idx = Indexes->getInstructionIndex(MII);
+ SlotIndex Idx = Indexes->getInstructionIndex(*MII);
- // Stop searching when NewIdx is reached.
- if (!SlotIndex::isEarlierInstr(NewIdx, Idx))
- return NewIdx;
+ // Stop searching when Before is reached.
+ if (!SlotIndex::isEarlierInstr(Before, Idx))
+ return Before;
// Check if MII uses Reg.
- for (MIBundleOperands MO(MII); MO.isValid(); ++MO)
- if (MO->isReg() &&
+ for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
+ if (MO->isReg() && !MO->isUndef() &&
TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
TRI.hasRegUnit(MO->getReg(), Reg))
- return Idx;
+ return Idx.getRegSlot();
}
- // Didn't reach NewIdx. It must be the first instruction in the block.
- return NewIdx;
+ // Didn't reach Before. It must be the first instruction in the block.
+ return Before;
}
};
-void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) {
- assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
+ assert(!MI.isBundled() && "Can't handle bundled instructions yet.");
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
Indexes->removeMachineInstrFromMaps(MI);
SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
- assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
- OldIndex < getMBBEndIdx(MI->getParent()) &&
+ assert(getMBBStartIdx(MI.getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI.getParent()) &&
"Cannot handle moves across basic block boundaries.");
HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
- HME.updateAllRanges(MI);
+ HME.updateAllRanges(&MI);
}
-void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
- MachineInstr* BundleStart,
+void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI,
+ MachineInstr &BundleStart,
bool UpdateFlags) {
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
- HME.updateAllRanges(MI);
+ HME.updateAllRanges(&MI);
}
void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
@@ -1295,8 +1401,8 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
- MachineInstr *MI = I;
- if (MI->isDebugValue())
+ MachineInstr &MI = *I;
+ if (MI.isDebugValue())
continue;
SlotIndex instrIdx = getInstructionIndex(MI);
@@ -1305,8 +1411,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
// FIXME: This doesn't currently handle early-clobber or multiple removed
// defs inside of the region to repair.
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end();
+ OI != OE; ++OI) {
const MachineOperand &MO = *OI;
if (!MO.isReg() || MO.getReg() != Reg)
continue;
@@ -1376,26 +1483,27 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
ArrayRef<unsigned> OrigRegs) {
// Find anchor points, which are at the beginning/end of blocks or at
// instructions that already have indexes.
- while (Begin != MBB->begin() && !Indexes->hasIndex(Begin))
+ while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin))
--Begin;
- while (End != MBB->end() && !Indexes->hasIndex(End))
+ while (End != MBB->end() && !Indexes->hasIndex(*End))
++End;
SlotIndex endIdx;
if (End == MBB->end())
endIdx = getMBBEndIdx(MBB).getPrevSlot();
else
- endIdx = getInstructionIndex(End);
+ endIdx = getInstructionIndex(*End);
Indexes->repairIndexesInRange(MBB, Begin, End);
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
- MachineInstr *MI = I;
- if (MI->isDebugValue())
+ MachineInstr &MI = *I;
+ if (MI.isDebugValue())
continue;
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end();
+ MOI != MOE; ++MOI) {
if (MOI->isReg() &&
TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
!hasInterval(MOI->getReg())) {
@@ -1459,3 +1567,9 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
}
ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
}
+
+void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->constructMainRangeFromSubranges(LI);
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index efbbcbe..4e2528f 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -43,7 +44,7 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
/// Remove Defs, add uses. This is the recommended way of calculating liveness.
void LivePhysRegs::stepBackward(const MachineInstr &MI) {
// Remove defined registers and regmask kills from the set.
- for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
if (!O->isDef())
continue;
@@ -56,8 +57,8 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
}
// Add uses to the set.
- for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg() || O->isUndef())
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg())
continue;
unsigned Reg = O->getReg();
if (Reg == 0)
@@ -73,7 +74,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
- for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
unsigned Reg = O->getReg();
if (Reg == 0)
@@ -120,12 +121,25 @@ void LivePhysRegs::print(raw_ostream &OS) const {
}
/// Dumps the currently live registers to the debug output.
-void LivePhysRegs::dump() const {
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " " << *this;
#endif
}
+bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ if (LiveRegs.count(Reg))
+ return false;
+ if (MRI.isReserved(Reg))
+ return false;
+ for (MCRegAliasIterator R(Reg, TRI, false); R.isValid(); ++R) {
+ if (LiveRegs.count(*R))
+ return false;
+ }
+ return true;
+}
+
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins())
@@ -135,40 +149,41 @@ static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
/// Add pristine registers to the given \p LiveRegs. This function removes
/// actually saved callee save registers when \p InPrologueEpilogue is false.
static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
+ const MachineFrameInfo &MFI,
const TargetRegisterInfo &TRI) {
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
- if (!MFI.isCalleeSavedInfoValid())
- return;
-
for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
LiveRegs.addReg(*CSR);
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
}
-void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB,
- bool AddPristinesAndCSRs) {
- if (AddPristinesAndCSRs) {
- const MachineFunction &MF = *MBB->getParent();
- addPristines(*this, MF, *TRI);
- if (!MBB->isReturnBlock()) {
+void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ ::addLiveIns(*this, *Succ);
+}
+
+void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ if (MBB.isReturnBlock()) {
// The return block has no successors whose live-ins we could merge
// below. So instead we add the callee saved registers manually.
for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
addReg(*I);
+ } else {
+ addPristines(*this, MF, MFI, *TRI);
}
}
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB->successors())
- ::addLiveIns(*this, *Succ);
+ addLiveOutsNoPristines(MBB);
}
-void LivePhysRegs::addLiveIns(const MachineBasicBlock *MBB,
- bool AddPristines) {
- if (AddPristines) {
- const MachineFunction &MF = *MBB->getParent();
- addPristines(*this, MF, *TRI);
- }
- ::addLiveIns(*this, *MBB);
+void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addPristines(*this, MF, MFI, *TRI);
+ ::addLiveIns(*this, MBB);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index c408615..db91ca1 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -42,12 +42,12 @@ void LiveRangeCalc::reset(const MachineFunction *mf,
static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
LiveRange &LR, const MachineOperand &MO) {
- const MachineInstr *MI = MO.getParent();
- SlotIndex DefIdx =
- Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex DefIdx =
+ Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
- // Create the def in LR. This may find an existing def.
- LR.createDeadDef(DefIdx, Alloc);
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(DefIdx, Alloc);
}
void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
@@ -120,13 +120,29 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
extendToUses(S, Reg, S.LaneMask);
}
LI.clear();
- LI.constructMainRangeFromSubranges(*Indexes, *Alloc);
+ constructMainRangeFromSubranges(LI);
} else {
resetLiveOutMap();
extendToUses(LI, Reg, ~0u);
}
}
+void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
+ // First create dead defs at all defs found in subranges.
+ LiveRange &MainRange = LI;
+ assert(MainRange.segments.empty() && MainRange.valnos.empty() &&
+ "Expect empty main liverange");
+
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ for (const VNInfo *VNI : SR.valnos) {
+ if (!VNI->isUnused() && !VNI->isPHIDef())
+ MainRange.createDeadDef(VNI->def, *Alloc);
+ }
+ }
+
+ resetLiveOutMap();
+ extendToUses(MainRange, LI.reg);
+}
void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
assert(MRI && Indexes && "call reset() first");
@@ -184,7 +200,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
// had an early-clobber flag.
isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
}
- UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber);
+ UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber);
}
// MI is reading Reg. We may have visited MI before if it happens to be
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index ff38c68..9de48b7 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -189,6 +189,11 @@ public:
/// enabled.
void calculate(LiveInterval &LI, bool TrackSubRegs);
+ /// For live interval \p LI with correct SubRanges construct matching
+ /// information for the main live range. Expects the main live range to not
+ /// have any segments or value numbers.
+ void constructMainRangeFromSubranges(LiveInterval &LI);
+
//===--------------------------------------------------------------------===//
// Low-level interface.
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 5ce364a..b35c0ad 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -53,7 +53,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
AliasAnalysis *aa) {
assert(DefMI && "Missing instruction");
ScannedRemattable = true;
- if (!TII.isTriviallyReMaterializable(DefMI, aa))
+ if (!TII.isTriviallyReMaterializable(*DefMI, aa))
return false;
Remattable.insert(VNI);
return true;
@@ -63,10 +63,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
- MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
+ unsigned Original = VRM->getOriginal(getReg());
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;
- checkRematerializable(VNI, DefMI, aa);
+ checkRematerializable(OrigVNI, DefMI, aa);
}
ScannedRemattable = true;
}
@@ -113,27 +116,21 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
return true;
}
-bool LiveRangeEdit::canRematerializeAt(Remat &RM,
- SlotIndex UseIdx,
- bool cheapAsAMove) {
+bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
+ SlotIndex UseIdx, bool cheapAsAMove) {
assert(ScannedRemattable && "Call anyRematerializable first");
// Use scanRemattable info.
- if (!Remattable.count(RM.ParentVNI))
+ if (!Remattable.count(OrigVNI))
return false;
// No defining instruction provided.
SlotIndex DefIdx;
- if (RM.OrigMI)
- DefIdx = LIS.getInstructionIndex(RM.OrigMI);
- else {
- DefIdx = RM.ParentVNI->def;
- RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
- assert(RM.OrigMI && "No defining instruction for remattable value");
- }
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
// If only cheap remats were requested, bail out early.
- if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI))
+ if (cheapAsAMove && !TII.isAsCheapAsAMove(*RM.OrigMI))
return false;
// Verify that all used registers are available with the same values.
@@ -150,10 +147,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
const TargetRegisterInfo &tri,
bool Late) {
assert(RM.OrigMI && "Invalid remat");
- TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ TII.reMaterialize(MBB, MI, DestReg, 0, *RM.OrigMI, tri);
+ // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
+ // to false anyway in case the isDead flag of RM.OrigMI's dest register
+ // is true.
+ (*--MI).getOperand(0).setIsDead(false);
Rematted.insert(RM.ParentVNI);
- return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
- .getRegSlot();
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
}
void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
@@ -188,9 +188,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
// Since we're moving the DefMI load, make sure we're not extending any live
// ranges.
- if (!allUsesAvailableAt(DefMI,
- LIS.getInstructionIndex(DefMI),
- LIS.getInstructionIndex(UseMI)))
+ if (!allUsesAvailableAt(DefMI, LIS.getInstructionIndex(*DefMI),
+ LIS.getInstructionIndex(*UseMI)))
return false;
// We also need to make sure it is safe to move the load.
@@ -206,11 +205,11 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
return false;
- MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI);
+ MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
if (!FoldMI)
return false;
DEBUG(dbgs() << " folded: " << *FoldMI);
- LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
+ LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
@@ -220,7 +219,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
const MachineOperand &MO) const {
- const MachineInstr *MI = MO.getParent();
+ const MachineInstr &MI = *MO.getParent();
SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
if (LI.Query(Idx).isKill())
return true;
@@ -235,9 +234,10 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
}
/// Find all live intervals that need to shrink, then remove the instruction.
-void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
+void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
+ AliasAnalysis *AA) {
assert(MI->allDefsAreDead() && "Def isn't really dead");
- SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
// Never delete a bundled instruction.
if (MI->isBundled()) {
@@ -261,6 +261,20 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Collect virtual registers to be erased after MI is gone.
SmallVector<unsigned, 8> RegsToErase;
bool ReadsPhysRegs = false;
+ bool isOrigDef = false;
+ unsigned Dest;
+ if (VRM && MI->getOperand(0).isReg()) {
+ Dest = MI->getOperand(0).getReg();
+ unsigned Original = VRM->getOriginal(Dest);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ // The original live-range may have been shrunk to
+ // an empty live-range. It happens when it is dead, but
+ // we still keep it around to be able to rematerialize
+ // other values that depend on it.
+ if (OrigVNI)
+ isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
+ }
// Check for live intervals that may shrink
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -314,11 +328,27 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
}
DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
- if (TheDelegate)
- TheDelegate->LRE_WillEraseInstruction(MI);
- LIS.RemoveMachineInstrFromMaps(MI);
- MI->eraseFromParent();
- ++NumDCEDeleted;
+ // If the dest of MI is an original reg and MI is reMaterializable,
+ // don't delete the inst. Replace the dest with a new reg, and keep
+ // the inst for remat of other siblings. The inst is saved in
+ // LiveRangeEdit::DeadRemats and will be deleted after all the
+ // allocations of the func are done.
+ if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
+ LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
+ pop_back();
+ markDeadRemat(MI);
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
+ MI->getOperand(0).setIsDead(true);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
}
// Erase any virtregs that are now empty and unused. There may be <undef>
@@ -332,14 +362,15 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
}
}
-void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
- ArrayRef<unsigned> RegsBeingSpilled) {
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled,
+ AliasAnalysis *AA) {
ToShrinkSet ToShrink;
for (;;) {
// Erase all dead defs.
while (!Dead.empty())
- eliminateDeadDef(Dead.pop_back_val(), ToShrink);
+ eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA);
if (ToShrink.empty())
break;
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h
new file mode 100644
index 0000000..bd57609
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -0,0 +1,62 @@
+//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file contains helper functions to modify live ranges.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+#define LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Helper function that distributes live range value numbers and the
+/// corresponding segments of a master live range \p LR to a list of newly
+/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
+/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
+/// live range in the \p SplitLRs array.
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+ EqClassesT VNIClasses) {
+ // Move segments to new intervals.
+ typename LiveRangeT::iterator J = LR.begin(), E = LR.end();
+ while (J != E && VNIClasses[J->valno->id] == 0)
+ ++J;
+ for (typename LiveRangeT::iterator I = J; I != E; ++I) {
+ if (unsigned eq = VNIClasses[I->valno->id]) {
+ assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ SplitLRs[eq-1]->segments.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LR.segments.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LR.getNumValNums();
+ while (j != e && VNIClasses[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (unsigned eq = VNIClasses[i]) {
+ VNI->id = SplitLRs[eq-1]->getNumValNums();
+ SplitLRs[eq-1]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LR.valnos[j++] = VNI;
+ }
+ }
+ LR.valnos.resize(j);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index 5c9c679..dbf1f96 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -14,14 +14,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <limits>
using namespace llvm;
#define DEBUG_TYPE "livestacks"
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 06b86d8..dd87216 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -64,7 +64,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
return nullptr;
}
-void LiveVariables::VarInfo::dump() const {
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
@@ -129,7 +129,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
}
void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
- MachineInstr *MI) {
+ MachineInstr &MI) {
assert(MRI->getVRegDef(reg) && "Register use before def!");
unsigned BBNum = MBB->getNumber();
@@ -140,7 +140,7 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
// Yes, this register is killed in this basic block already. Increase the
// live range by updating the kill instruction.
- VRInfo.Kills.back() = MI;
+ VRInfo.Kills.back() = &MI;
return;
}
@@ -171,7 +171,7 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
// already marked as alive in this basic block, that means it is alive in at
// least one of the successor blocks, it's not a kill.
if (!VRInfo.AliveBlocks.test(BBNum))
- VRInfo.Kills.push_back(MI);
+ VRInfo.Kills.push_back(&MI);
// Update all dominating blocks to mark them as "known live".
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
@@ -179,12 +179,12 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
}
-void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
VarInfo &VRInfo = getVarInfo(Reg);
if (VRInfo.AliveBlocks.empty())
// If vr is not alive in any block, then defaults to dead.
- VRInfo.Kills.push_back(MI);
+ VRInfo.Kills.push_back(&MI);
}
/// FindLastPartialDef - Return the last partial def of the specified register.
@@ -228,7 +228,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
-void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
MachineInstr *LastDef = PhysRegDef[Reg];
// If there was a previous use or a "full" def all is well.
if (!LastDef && !PhysRegUse[Reg]) {
@@ -273,7 +273,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
// Remember this use.
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
- PhysRegUse[*SubRegs] = MI;
+ PhysRegUse[*SubRegs] = &MI;
}
/// FindLastRefOrPartRef - Return the last reference or partial reference of
@@ -483,7 +483,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
Defs.push_back(Reg); // Remember this def.
}
-void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
unsigned Reg = Defs.back();
@@ -491,21 +491,21 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
- PhysRegDef[SubReg] = MI;
+ PhysRegDef[SubReg] = &MI;
PhysRegUse[SubReg] = nullptr;
}
}
}
-void LiveVariables::runOnInstr(MachineInstr *MI,
+void LiveVariables::runOnInstr(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
- assert(!MI->isDebugValue());
+ assert(!MI.isDebugValue());
// Process all of the operands of the instruction...
- unsigned NumOperandsToProcess = MI->getNumOperands();
+ unsigned NumOperandsToProcess = MI.getNumOperands();
// Unless it is a PHI node. In this case, ONLY process the DEF, not any
// of the uses. They will be handled in other basic blocks.
- if (MI->isPHI())
+ if (MI.isPHI())
NumOperandsToProcess = 1;
// Clear kill and dead markers. LV will recompute them.
@@ -513,7 +513,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
SmallVector<unsigned, 4> DefRegs;
SmallVector<unsigned, 1> RegMasks;
for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isRegMask()) {
RegMasks.push_back(i);
continue;
@@ -527,15 +527,18 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
- } else /*MO.isDef()*/ {
- if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- MRI->isReserved(MOReg)))
+ } else {
+ assert(MO.isDef());
+ // FIXME: We should not remove any dead flags. However the MIPS RDDSP
+ // instruction needs it at the moment: http://llvm.org/PR27116.
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ !MRI->isReserved(MOReg))
MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
}
- MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock *MBB = MI.getParent();
// Process all uses.
for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
unsigned MOReg = UseRegs[i];
@@ -547,7 +550,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
// Process all masked registers. (Call clobbers).
for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
- HandleRegMask(MI->getOperand(RegMasks[i]));
+ HandleRegMask(MI.getOperand(RegMasks[i]));
// Process all defs.
for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
@@ -555,7 +558,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
else if (!MRI->isReserved(MOReg))
- HandlePhysRegDef(MOReg, MI, Defs);
+ HandlePhysRegDef(MOReg, &MI, Defs);
}
UpdatePhysRegDefs(MI, Defs);
}
@@ -572,12 +575,10 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
// Loop over all of the instructions, processing them.
DistanceMap.clear();
unsigned Dist = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- MachineInstr *MI = I;
- if (MI->isDebugValue())
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugValue())
continue;
- DistanceMap.insert(std::make_pair(MI, Dist++));
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
runOnInstr(MI, Defs);
}
@@ -679,17 +680,17 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
-void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
- MachineInstr *NewMI) {
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+ MachineInstr &NewMI) {
VarInfo &VI = getVarInfo(Reg);
- std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI);
}
/// removeVirtualRegistersKilled - Remove all killed info for the specified
/// instruction.
-void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isKill()) {
MO.setIsKill(false);
unsigned Reg = MO.getReg();
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index eb60005..af7392f 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -256,12 +256,12 @@ lookupCandidateBaseReg(unsigned BaseReg,
int64_t BaseOffset,
int64_t FrameSizeAdjust,
int64_t LocalFrameOffset,
- const MachineInstr *MI,
+ const MachineInstr &MI,
const TargetRegisterInfo *TRI) {
// Check if the relative offset from the where the base register references
// to the target address is in range for the instruction.
int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
- return TRI->isFrameOffsetLegal(MI, BaseReg, Offset);
+ return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset);
}
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -285,16 +285,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
- for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
- MachineInstr *MI = I;
-
+ for (MachineBasicBlock &BB : Fn) {
+ for (MachineInstr &MI : BB) {
// Debug value, stackmap and patchpoint instructions can't be out of
// range, so they don't need any updates.
- if (MI->isDebugValue() ||
- MI->getOpcode() == TargetOpcode::STATEPOINT ||
- MI->getOpcode() == TargetOpcode::STACKMAP ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT)
+ if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT)
continue;
// For now, allocate the base register(s) within the basic block
@@ -303,19 +300,18 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// than that, but the increased register pressure makes that a
// tricky thing to balance. Investigate if re-materializing these
// becomes an issue.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
// Consider replacing all frame index operands that reference
// an object allocated in the local block.
- if (MI->getOperand(i).isFI()) {
+ if (MI.getOperand(i).isFI()) {
// Don't try this with values not in the local block.
- if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex()))
break;
- int Idx = MI->getOperand(i).getIndex();
+ int Idx = MI.getOperand(i).getIndex();
int64_t LocalOffset = LocalOffsets[Idx];
- if (!TRI->needsFrameBaseReg(MI, LocalOffset))
+ if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
- FrameReferenceInsns.
- push_back(FrameRef(MI, LocalOffset, Idx));
+ FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx));
break;
}
}
@@ -333,46 +329,44 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Loop through the frame references and allocate for them as necessary.
for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
FrameRef &FR = FrameReferenceInsns[ref];
- MachineBasicBlock::iterator I = FR.getMachineInstr();
- MachineInstr *MI = I;
+ MachineInstr &MI = *FR.getMachineInstr();
int64_t LocalOffset = FR.getLocalOffset();
int FrameIdx = FR.getFrameIndex();
assert(MFI->isObjectPreAllocated(FrameIdx) &&
"Only pre-allocated locals expected!");
- DEBUG(dbgs() << "Considering: " << *MI);
+ DEBUG(dbgs() << "Considering: " << MI);
unsigned idx = 0;
- for (unsigned f = MI->getNumOperands(); idx != f; ++idx) {
- if (!MI->getOperand(idx).isFI())
+ for (unsigned f = MI.getNumOperands(); idx != f; ++idx) {
+ if (!MI.getOperand(idx).isFI())
continue;
- if (FrameIdx == I->getOperand(idx).getIndex())
+ if (FrameIdx == MI.getOperand(idx).getIndex())
break;
}
- assert(idx < MI->getNumOperands() && "Cannot find FI operand");
+ assert(idx < MI.getNumOperands() && "Cannot find FI operand");
int64_t Offset = 0;
int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
- DEBUG(dbgs() << " Replacing FI in: " << *MI);
+ DEBUG(dbgs() << " Replacing FI in: " << MI);
// If we have a suitable base register available, use it; otherwise
// create a new one. Note that any offset encoded in the
// instruction itself will be taken into account by the target,
// so we don't have to adjust for it here when reusing a base
// register.
- if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset,
- FrameSizeAdjust, LocalOffset, MI,
- TRI)) {
+ if (UsedBaseReg &&
+ lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,
+ LocalOffset, MI, TRI)) {
DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
// We found a register to reuse.
Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
} else {
- // No previously defined register was in range, so create a // new one.
-
- int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ // No previously defined register was in range, so create a new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx);
int64_t PrevBaseOffset = BaseOffset;
BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
@@ -386,12 +380,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
!lookupCandidateBaseReg(
BaseReg, BaseOffset, FrameSizeAdjust,
FrameReferenceInsns[ref + 1].getLocalOffset(),
- FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
+ *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
BaseOffset = PrevBaseOffset;
continue;
}
- const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineFunction *MF = MI.getParent()->getParent();
const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
@@ -416,8 +410,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Modify the instruction to use the new base register rather
// than the frame index operand.
- TRI->resolveFrameIndex(*I, BaseReg, Offset);
- DEBUG(dbgs() << "Resolved: " << *MI);
+ TRI->resolveFrameIndex(MI, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << MI);
++NumReplacements;
}
diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
new file mode 100644
index 0000000..6966c8c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -0,0 +1,162 @@
+//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is required for targets depending on libgcc style
+// emulated thread local storage variables. For every defined TLS variable xyz,
+// an __emutls_v.xyz is generated. If there is non-zero initialized value
+// an __emutls_t.xyz is also generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loweremutls"
+
+namespace {
+
+class LowerEmuTLS : public ModulePass {
+ const TargetMachine *TM;
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { }
+ explicit LowerEmuTLS(const TargetMachine *TM)
+ : ModulePass(ID), TM(TM) {
+ initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+private:
+ bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
+ static void copyLinkageVisibility(Module &M,
+ const GlobalVariable *from,
+ GlobalVariable *to) {
+ to->setLinkage(from->getLinkage());
+ to->setVisibility(from->getVisibility());
+ if (from->hasComdat()) {
+ to->setComdat(M.getOrInsertComdat(to->getName()));
+ to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
+ }
+ }
+};
+}
+
+char LowerEmuTLS::ID = 0;
+
+INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
+ "Add __emutls_[vt]. variables for emultated TLS model",
+ false, false)
+
+ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) {
+ return new LowerEmuTLS(TM);
+}
+
+bool LowerEmuTLS::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ if (!TM || !TM->Options.EmulatedTLS)
+ return false;
+
+ bool Changed = false;
+ SmallVector<const GlobalVariable*, 8> TlsVars;
+ for (const auto &G : M.globals()) {
+ if (G.isThreadLocal())
+ TlsVars.append({&G});
+ }
+ for (const auto G : TlsVars)
+ Changed |= addEmuTlsVar(M, G);
+ return Changed;
+}
+
+bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
+ LLVMContext &C = M.getContext();
+ PointerType *VoidPtrType = Type::getInt8PtrTy(C);
+
+ std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str();
+ GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName);
+ if (EmuTlsVar)
+ return false; // It has been added before.
+
+ const DataLayout &DL = M.getDataLayout();
+ Constant *NullPtr = ConstantPointerNull::get(VoidPtrType);
+
+ // Get non-zero initializer from GV's initializer.
+ const Constant *InitValue = nullptr;
+ if (GV->hasInitializer()) {
+ InitValue = GV->getInitializer();
+ const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+ // When GV's init value is all 0, omit the EmuTlsTmplVar and let
+ // the emutls library function to reset newly allocated TLS variables.
+ if (isa<ConstantAggregateZero>(InitValue) ||
+ (InitIntValue && InitIntValue->isZero()))
+ InitValue = nullptr;
+ }
+
+ // Create the __emutls_v. symbol, whose type has 4 fields:
+ // word size; // size of GV in bytes
+ // word align; // alignment of GV
+ // void *ptr; // initialized to 0; set at run time per thread.
+ // void *templ; // 0 or point to __emutls_t.*
+ // sizeof(word) should be the same as sizeof(void*) on target.
+ IntegerType *WordType = DL.getIntPtrType(C);
+ PointerType *InitPtrType = InitValue ?
+ PointerType::getUnqual(InitValue->getType()) : VoidPtrType;
+ Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
+ ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
+ StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+ EmuTlsVar = cast<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
+ copyLinkageVisibility(M, GV, EmuTlsVar);
+
+ // Define "__emutls_t.*" and "__emutls_v.*" only if GV is defined.
+ if (!GV->hasInitializer())
+ return true;
+
+ Type *GVType = GV->getValueType();
+ unsigned GVAlignment = GV->getAlignment();
+ if (!GVAlignment) {
+ // When LLVM IL declares a variable without alignment, use
+ // the ABI default alignment for the type.
+ GVAlignment = DL.getABITypeAlignment(GVType);
+ }
+
+ // Define "__emutls_t.*" if there is InitValue
+ GlobalVariable *EmuTlsTmplVar = nullptr;
+ if (InitValue) {
+ std::string EmuTlsTmplName = ("__emutls_t." + GV->getName()).str();
+ EmuTlsTmplVar = dyn_cast_or_null<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsTmplName, GVType));
+ assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
+ EmuTlsTmplVar->setConstant(true);
+ EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
+ EmuTlsTmplVar->setAlignment(GVAlignment);
+ copyLinkageVisibility(M, GV, EmuTlsTmplVar);
+ }
+
+ // Define "__emutls_v.*" with initializer and alignment.
+ Constant *ElementValues[4] = {
+ ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
+ ConstantInt::get(WordType, GVAlignment),
+ NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr
+ };
+ ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
+ EmuTlsVar->setInitializer(
+ ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+ unsigned MaxAlignment = std::max(
+ DL.getABITypeAlignment(WordType),
+ DL.getABITypeAlignment(VoidPtrType));
+ EmuTlsVar->setAlignment(MaxAlignment);
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 28f9d4e..6e3de52 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "MILexer.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -21,6 +22,9 @@ using namespace llvm;
namespace {
+typedef function_ref<void(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
/// This class provides a way to iterate and get characters from the source
/// string.
class Cursor {
@@ -133,9 +137,7 @@ static std::string unescapeQuotedString(StringRef Value) {
}
/// Lex a string constant using the following regular expression: \"[^\"]*\"
-static Cursor lexStringConstant(
- Cursor C,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
assert(C.peek() == '"');
for (C.advance(); C.peek() != '"'; C.advance()) {
if (C.isEOF() || isNewlineChar(C.peek())) {
@@ -149,9 +151,8 @@ static Cursor lexStringConstant(
return C;
}
-static Cursor lexName(
- Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
+ unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
auto Range = C;
C.advance(PrefixLength);
if (C.peek() == '"') {
@@ -241,9 +242,8 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
return C;
}
-static Cursor maybeLexMachineBasicBlock(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
bool IsReference = C.remaining().startswith("%bb.");
if (!IsReference && !C.remaining().startswith("bb."))
return None;
@@ -326,9 +326,17 @@ static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
}
-static Cursor maybeLexIRBlock(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%subreg.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
+ ErrorCallback);
+}
+
+static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir-block.";
if (!C.remaining().startswith(Rule))
return None;
@@ -337,9 +345,8 @@ static Cursor maybeLexIRBlock(
return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
}
-static Cursor maybeLexIRValue(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
const StringRef Rule = "%ir.";
if (!C.remaining().startswith(Rule))
return None;
@@ -373,9 +380,8 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
return C;
}
-static Cursor maybeLexGlobalValue(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '@')
return None;
if (!isdigit(C.peek(1)))
@@ -391,9 +397,8 @@ static Cursor maybeLexGlobalValue(
return C;
}
-static Cursor maybeLexExternalSymbol(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '$')
return None;
return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
@@ -456,9 +461,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Default(MIToken::Error);
}
-static Cursor maybeLexExlaim(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '!')
return None;
auto Range = C;
@@ -497,6 +501,10 @@ static MIToken::TokenKind symbolToken(char C) {
return MIToken::plus;
case '-':
return MIToken::minus;
+ case '<':
+ return MIToken::less;
+ case '>':
+ return MIToken::greater;
default:
return MIToken::Error;
}
@@ -527,9 +535,8 @@ static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
return C;
}
-static Cursor maybeLexEscapedIRValue(
- Cursor C, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '`')
return None;
auto Range = C;
@@ -551,9 +558,8 @@ static Cursor maybeLexEscapedIRValue(
return C;
}
-StringRef llvm::lexMIToken(
- StringRef Source, MIToken &Token,
- function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
auto C = skipComment(skipWhitespace(Cursor(Source)));
if (C.isEOF()) {
Token.reset(MIToken::Eof, C.remaining());
@@ -574,6 +580,8 @@ StringRef llvm::lexMIToken(
return R.remaining();
if (Cursor R = maybeLexConstantPoolItem(C, Token))
return R.remaining();
+ if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
+ return R.remaining();
if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index ff54aa3..32fc8ab 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -45,6 +45,8 @@ struct MIToken {
rbrace,
plus,
minus,
+ less,
+ greater,
// Keywords
kw_implicit,
@@ -116,7 +118,8 @@ struct MIToken {
IRBlock,
NamedIRValue,
IRValue,
- QuotedIRValue // `<constant value>`
+ QuotedIRValue, // `<constant value>`
+ SubRegisterIndex
};
private:
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index f2f6584..b3fd16f 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -17,24 +17,30 @@
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
+ SourceMgr &SM, const SlotMapping &IRSlots)
+ : MF(MF), SM(&SM), IRSlots(IRSlots) {
+}
+
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -55,14 +61,11 @@ struct ParsedMachineOperand {
};
class MIParser {
- SourceMgr &SM;
MachineFunction &MF;
SMDiagnostic &Error;
StringRef Source, CurrentSource;
MIToken Token;
const PerFunctionMIParsingState &PFS;
- /// Maps from indices to unnamed global values and metadata nodes.
- const SlotMapping &IRSlots;
/// Maps from instruction names to op codes.
StringMap<unsigned> Names2InstrOpCodes;
/// Maps from register names to registers.
@@ -83,11 +86,12 @@ class MIParser {
StringMap<unsigned> Names2BitmaskTargetFlags;
public:
- MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
- StringRef Source, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots);
+ MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source);
- void lex();
+ /// \p SkipChar gives the number of characters to skip before looking
+ /// for the next token.
+ void lex(unsigned SkipChar = 0);
/// Report an error at the current location with the given message.
///
@@ -119,12 +123,17 @@ public:
bool parseRegisterFlag(unsigned &Flags);
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+ bool parseSize(unsigned &Size);
bool parseRegisterOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
const Constant *&C);
bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+ bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read,
+ Type *&Ty);
+ // \p MustBeSized defines whether or not \p Ty must be sized.
+ bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true);
bool parseTypedImmediateOperand(MachineOperand &Dest);
bool parseFPImmediateOperand(MachineOperand &Dest);
bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -136,6 +145,7 @@ public:
bool parseGlobalValue(GlobalValue *&GV);
bool parseGlobalAddressOperand(MachineOperand &Dest);
bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+ bool parseSubRegisterIndexOperand(MachineOperand &Dest);
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
bool parseMDNode(MDNode *&Node);
@@ -155,7 +165,7 @@ public:
bool parseAlignment(unsigned &Alignment);
bool parseOperandsOffset(MachineOperand &Op);
bool parseIRValue(const Value *&V);
- bool parseMemoryOperandFlag(unsigned &Flags);
+ bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
bool parseMachinePointerInfo(MachinePointerInfo &Dest);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
@@ -244,21 +254,21 @@ private:
} // end anonymous namespace
-MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
- StringRef Source, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots)
- : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
- PFS(PFS), IRSlots(IRSlots) {}
+MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source)
+ : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
+{}
-void MIParser::lex() {
+void MIParser::lex(unsigned SkipChar) {
CurrentSource = lexMIToken(
- CurrentSource, Token,
+ CurrentSource.data() + SkipChar, Token,
[this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
}
bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
+ const SourceMgr &SM = *PFS.SM;
assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
@@ -587,6 +597,14 @@ bool MIParser::parse(MachineInstr *&MI) {
if (Token.isError() || parseInstruction(OpCode, Flags))
return true;
+ Type *Ty = nullptr;
+ if (isPreISelGenericOpcode(OpCode)) {
+ // For generic opcode, a type is mandatory.
+ auto Loc = Token.location();
+ if (parseIRType(Loc, Ty))
+ return true;
+ }
+
// Parse the remaining machine operands.
while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
@@ -642,6 +660,8 @@ bool MIParser::parse(MachineInstr *&MI) {
// TODO: Check for extraneous machine operands.
MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
MI->setFlags(Flags);
+ if (Ty)
+ MI->setType(Ty);
for (const auto &Operand : Operands)
MI->addOperand(MF, Operand.Operand);
if (assignRegisterTies(*MI, Operands))
@@ -876,6 +896,17 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
return false;
}
+bool MIParser::parseSize(unsigned &Size) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal for the size");
+ if (getUnsigned(Size))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
bool MIParser::assignRegisterTies(MachineInstr &MI,
ArrayRef<ParsedMachineOperand> Operands) {
SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
@@ -931,12 +962,31 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (Token.is(MIToken::colon)) {
if (parseSubRegisterIndex(SubReg))
return true;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("subregister index expects a virtual register");
}
- if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) {
- unsigned Idx;
- if (parseRegisterTiedDefIndex(Idx))
+ if ((Flags & RegState::Define) == 0) {
+ if (consumeIfPresent(MIToken::lparen)) {
+ unsigned Idx;
+ if (parseRegisterTiedDefIndex(Idx))
+ return true;
+ TiedDefIdx = Idx;
+ }
+ } else if (consumeIfPresent(MIToken::lparen)) {
+ // Virtual registers may have a size with GlobalISel.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("unexpected size on physical register");
+ unsigned Size;
+ if (parseSize(Size))
return true;
- TiedDefIdx = Idx;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setSize(Reg, Size);
+ } else if (PFS.GenericVRegs.count(Reg)) {
+ // Generic virtual registers must have a size.
+ // If we end up here this means the size hasn't been specified and
+ // this is bad!
+ return error("generic virtual registers must have a size");
}
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
@@ -961,7 +1011,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
auto Source = StringValue.str(); // The source has to be null terminated.
SMDiagnostic Err;
C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
- &IRSlots);
+ &PFS.IRSlots);
if (!C)
return error(Loc + Err.getColumnNo(), Err.getMessage());
return false;
@@ -974,6 +1024,38 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
return false;
}
+bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue,
+ unsigned &Read, Type *&Ty) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ Ty = parseTypeAtBeginning(Source.c_str(), Read, Err,
+ *MF.getFunction()->getParent(), &PFS.IRSlots);
+ if (!Ty)
+ return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty,
+ bool MustBeSized) {
+ // At this point we enter in the IR world, i.e., to get the correct type,
+ // we need to hand off the whole string, not just the current token.
+ // E.g., <4 x i64> would give '<' as a token and there is not much
+ // the IR parser can do with that.
+ unsigned Read = 0;
+ if (parseIRType(Loc, StringRef(Loc), Read, Ty))
+ return true;
+ // The type must be sized, otherwise there is not much the backend
+ // can do with it.
+ if (MustBeSized && !Ty->isSized())
+ return error("expected a sized type");
+ // The next token is Read characters from the Loc.
+ // However, the current location is not Loc, but Loc + the length of Token.
+ // Therefore, subtract the length of Token (range().end() - Loc) to the
+ // number of characters to skip before the next token.
+ lex(Read - (Token.range().end() - Loc));
+ return false;
+}
+
bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::IntegerType));
auto Loc = Token.location();
@@ -1100,10 +1182,10 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) {
unsigned GVIdx;
if (getUnsigned(GVIdx))
return true;
- if (GVIdx >= IRSlots.GlobalValues.size())
+ if (GVIdx >= PFS.IRSlots.GlobalValues.size())
return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
"'");
- GV = IRSlots.GlobalValues[GVIdx];
+ GV = PFS.IRSlots.GlobalValues[GVIdx];
break;
}
default:
@@ -1161,6 +1243,17 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::SubRegisterIndex));
+ StringRef Name = Token.stringValue();
+ unsigned SubRegIndex = getSubRegIndex(Token.stringValue());
+ if (SubRegIndex == 0)
+ return error(Twine("unknown subregister index '") + Name + "'");
+ lex();
+ Dest = MachineOperand::CreateImm(SubRegIndex);
+ return false;
+}
+
bool MIParser::parseMDNode(MDNode *&Node) {
assert(Token.is(MIToken::exclaim));
auto Loc = Token.location();
@@ -1170,8 +1263,8 @@ bool MIParser::parseMDNode(MDNode *&Node) {
unsigned ID;
if (getUnsigned(ID))
return true;
- auto NodeInfo = IRSlots.MetadataNodes.find(ID);
- if (NodeInfo == IRSlots.MetadataNodes.end())
+ auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo == PFS.IRSlots.MetadataNodes.end())
return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
lex();
Node = NodeInfo->second.get();
@@ -1406,6 +1499,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseJumpTableIndexOperand(Dest);
case MIToken::ExternalSymbol:
return parseExternalSymbolOperand(Dest);
+ case MIToken::SubRegisterIndex:
+ return parseSubRegisterIndexOperand(Dest);
case MIToken::exclaim:
return parseMetadataOperand(Dest);
case MIToken::kw_cfi_same_value:
@@ -1559,8 +1654,8 @@ bool MIParser::getUint64(uint64_t &Result) {
return false;
}
-bool MIParser::parseMemoryOperandFlag(unsigned &Flags) {
- const unsigned OldFlags = Flags;
+bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
+ const auto OldFlags = Flags;
switch (Token.kind()) {
case MIToken::kw_volatile:
Flags |= MachineMemOperand::MOVolatile;
@@ -1605,6 +1700,14 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
// The token was already consumed, so use return here instead of break.
return false;
}
+ case MIToken::StackObject: {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
case MIToken::kw_call_entry: {
lex();
switch (Token.kind()) {
@@ -1636,7 +1739,8 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
- Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) {
+ Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
+ Token.is(MIToken::kw_call_entry)) {
const PseudoSourceValue *PSV = nullptr;
if (parseMemoryPseudoSourceValue(PSV))
return true;
@@ -1667,7 +1771,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (expectAndConsume(MIToken::lparen))
return true;
- unsigned Flags = 0;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
while (Token.isMemoryOperandFlag()) {
if (parseMemoryOperandFlag(Flags))
return true;
@@ -1688,14 +1792,16 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
return true;
lex();
- const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
- if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word)
- return error(Twine("expected '") + Word + "'");
- lex();
-
MachinePointerInfo Ptr = MachinePointerInfo();
- if (parseMachinePointerInfo(Ptr))
- return true;
+ if (Token.is(MIToken::Identifier)) {
+ const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ if (Token.stringValue() != Word)
+ return error(Twine("expected '") + Word + "'");
+ lex();
+
+ if (parseMachinePointerInfo(Ptr))
+ return true;
+ }
unsigned BaseAlignment = Size;
AAMDNodes AAInfo;
MDNode *Range = nullptr;
@@ -1947,65 +2053,42 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
return false;
}
-bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
- PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src,
SMDiagnostic &Error) {
- SourceMgr SM;
- SM.AddNewSourceBuffer(
- MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
- SMLoc());
- return MIParser(SM, MF, Error, Src, PFS, IRSlots)
- .parseBasicBlockDefinitions(PFS.MBBSlots);
-}
-
-bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
- SMDiagnostic &Error) {
- SourceMgr SM;
- SM.AddNewSourceBuffer(
- MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
- SMLoc());
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks();
-}
-
-bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB);
-}
-
-bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+ return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseBasicBlocks();
+}
+
+bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
+}
+
+bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots)
- .parseStandaloneNamedRegister(Reg);
+ return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
}
-bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots)
- .parseStandaloneVirtualRegister(Reg);
+ return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg);
}
-bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+ int &FI, StringRef Src,
SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots)
- .parseStandaloneStackObject(FI);
+ return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
}
-bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
- StringRef Src, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error) {
- return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node);
+bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS,
+ MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 8aef704..18895b9 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -15,26 +15,37 @@
#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallSet.h"
namespace llvm {
+class StringRef;
class BasicBlock;
class MachineBasicBlock;
-class MachineInstr;
class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
class MDNode;
struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
struct PerFunctionMIParsingState {
+ MachineFunction &MF;
+ SourceMgr *SM;
+ const SlotMapping &IRSlots;
+
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, unsigned> VirtualRegisterSlots;
DenseMap<unsigned, int> FixedStackObjectSlots;
DenseMap<unsigned, int> StackObjectSlots;
DenseMap<unsigned, unsigned> ConstantPoolSlots;
DenseMap<unsigned, unsigned> JumpTableSlots;
+ /// Hold the generic virtual registers.
+ SmallSet<unsigned, 8> GenericVRegs;
+
+ PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
+ const SlotMapping &IRSlots);
};
/// Parse the machine basic block definitions, and skip the machine
@@ -49,10 +60,8 @@ struct PerFunctionMIParsingState {
/// resolve the machine basic block references.
///
/// Return true if an error occurred.
-bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
- PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
- SMDiagnostic &Error);
+bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error);
/// Parse the machine instructions.
///
@@ -64,35 +73,26 @@ bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
/// on the given source string.
///
/// Return true if an error occurred.
-bool parseMachineInstructions(MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
-
-bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
-
-bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+bool parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error);
+
+bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
SMDiagnostic &Error);
-bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
- MachineFunction &MF, StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots,
+bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
SMDiagnostic &Error);
-bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF,
- StringRef Src,
- const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
+bool parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+ int &FI, StringRef Src, SMDiagnostic &Error);
-bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
- StringRef Src, const PerFunctionMIParsingState &PFS,
- const SlotMapping &IRSlots, SMDiagnostic &Error);
+bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+ StringRef Src, SMDiagnostic &Error);
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 422efbc..4aa3df6 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -15,27 +15,30 @@
#include "llvm/CodeGen/MIRParser/MIRParser.h"
#include "MIParser.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLTraits.h"
#include <memory>
@@ -53,6 +56,8 @@ class MIRParserImpl {
SlotMapping IRSlots;
/// Maps from register class names to register classes.
StringMap<const TargetRegisterClass *> Names2RegClasses;
+ /// Maps from register bank names to register banks.
+ StringMap<const RegisterBank *> Names2RegBanks;
public:
MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -97,44 +102,38 @@ public:
/// Return true if error occurred.
bool initializeMachineFunction(MachineFunction &MF);
- bool initializeRegisterInfo(MachineFunction &MF,
- const yaml::MachineFunction &YamlMF,
- PerFunctionMIParsingState &PFS);
+ bool initializeRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
- void inferRegisterInfo(MachineFunction &MF,
+ void inferRegisterInfo(const PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF);
- bool initializeFrameInfo(MachineFunction &MF,
- const yaml::MachineFunction &YamlMF,
- PerFunctionMIParsingState &PFS);
+ bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
- bool parseCalleeSavedRegister(MachineFunction &MF,
- PerFunctionMIParsingState &PFS,
+ bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
const yaml::StringValue &RegisterSource,
int FrameIdx);
- bool parseStackObjectsDebugInfo(MachineFunction &MF,
- PerFunctionMIParsingState &PFS,
+ bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineStackObject &Object,
int FrameIdx);
- bool initializeConstantPool(MachineConstantPool &ConstantPool,
- const yaml::MachineFunction &YamlMF,
- const MachineFunction &MF,
- DenseMap<unsigned, unsigned> &ConstantPoolSlots);
+ bool initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool,
+ const yaml::MachineFunction &YamlMF);
- bool initializeJumpTableInfo(MachineFunction &MF,
- const yaml::MachineJumpTable &YamlJTI,
- PerFunctionMIParsingState &PFS);
+ bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI);
private:
- bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
- MachineFunction &MF, const PerFunctionMIParsingState &PFS);
+ bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+ const yaml::StringValue &Source);
- bool parseMBBReference(MachineBasicBlock *&MBB,
- const yaml::StringValue &Source, MachineFunction &MF,
- const PerFunctionMIParsingState &PFS);
+ bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source);
/// Return a MIR diagnostic converted from an MI string diagnostic.
SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
@@ -149,12 +148,18 @@ private:
void createDummyFunction(StringRef Name, Module &M);
void initNames2RegClasses(const MachineFunction &MF);
+ void initNames2RegBanks(const MachineFunction &MF);
/// Check if the given identifier is a name of a register class.
///
/// Return null if the name isn't a register class.
const TargetRegisterClass *getRegClass(const MachineFunction &MF,
StringRef Name);
+
+ /// Check if the given identifier is a name of a register bank.
+ ///
+ /// Return null if the name isn't a register bank.
+ const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
};
} // end namespace llvm
@@ -226,7 +231,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
Context, &IRSlots);
if (!M) {
reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
- return M;
+ return nullptr;
}
In.nextDocument();
if (!In.setCurrentDocument())
@@ -285,46 +290,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasInlineAsm(YamlMF.HasInlineAsm);
- PerFunctionMIParsingState PFS;
- if (initializeRegisterInfo(MF, YamlMF, PFS))
+ if (YamlMF.AllVRegsAllocated)
+ MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots);
+ if (initializeRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
auto *ConstantPool = MF.getConstantPool();
assert(ConstantPool && "Constant pool must be created");
- if (initializeConstantPool(*ConstantPool, YamlMF, MF,
- PFS.ConstantPoolSlots))
+ if (initializeConstantPool(PFS, *ConstantPool, YamlMF))
return true;
}
+ StringRef BlockStr = YamlMF.Body.Value.Value;
SMDiagnostic Error;
- if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS,
- IRSlots, Error)) {
+ SourceMgr BlockSM;
+ BlockSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(BlockStr, "",/*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &BlockSM;
+ if (parseMachineBasicBlockDefinitions(PFS, BlockStr, Error)) {
reportDiagnostic(
diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
return true;
}
+ PFS.SM = &SM;
if (MF.empty())
return error(Twine("machine function '") + Twine(MF.getName()) +
"' requires at least one machine basic block in its body");
// Initialize the frame information after creating all the MBBs so that the
// MBB references in the frame information can be resolved.
- if (initializeFrameInfo(MF, YamlMF, PFS))
+ if (initializeFrameInfo(PFS, YamlMF))
return true;
// Initialize the jump table after creating all the MBBs so that the MBB
// references can be resolved.
if (!YamlMF.JumpTableInfo.Entries.empty() &&
- initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS))
+ initializeJumpTableInfo(PFS, YamlMF.JumpTableInfo))
return true;
// Parse the machine instructions after creating all of the MBBs so that the
// parser can resolve the MBB references.
- if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots,
- Error)) {
+ StringRef InsnStr = YamlMF.Body.Value.Value;
+ SourceMgr InsnSM;
+ InsnSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(InsnStr, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &InsnSM;
+ if (parseMachineInstructions(PFS, InsnStr, Error)) {
reportDiagnostic(
diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
return true;
}
- inferRegisterInfo(MF, YamlMF);
+ PFS.SM = &SM;
+
+ inferRegisterInfo(PFS, YamlMF);
// FIXME: This is a temporary workaround until the reserved registers can be
// serialized.
MF.getRegInfo().freezeReservedRegs(MF);
@@ -332,9 +351,9 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
return false;
}
-bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
- const yaml::MachineFunction &YamlMF,
- PerFunctionMIParsingState &PFS) {
+bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
MachineRegisterInfo &RegInfo = MF.getRegInfo();
assert(RegInfo.isSSA());
if (!YamlMF.IsSSA)
@@ -347,12 +366,28 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
SMDiagnostic Error;
// Parse the virtual register information.
for (const auto &VReg : YamlMF.VirtualRegisters) {
- const auto *RC = getRegClass(MF, VReg.Class.Value);
- if (!RC)
- return error(VReg.Class.SourceRange.Start,
- Twine("use of undefined register class '") +
- VReg.Class.Value + "'");
- unsigned Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Reg;
+ if (StringRef(VReg.Class.Value).equals("_")) {
+ // This is a generic virtual register.
+ // The size will be set appropriately when we reach the definition.
+ Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
+ PFS.GenericVRegs.insert(Reg);
+ } else {
+ const auto *RC = getRegClass(MF, VReg.Class.Value);
+ if (RC) {
+ Reg = RegInfo.createVirtualRegister(RC);
+ } else {
+ const auto *RegBank = getRegBank(MF, VReg.Class.Value);
+ if (!RegBank)
+ return error(
+ VReg.Class.SourceRange.Start,
+ Twine("use of undefined register class or register bank '") +
+ VReg.Class.Value + "'");
+ Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
+ RegInfo.setRegBank(Reg, *RegBank);
+ PFS.GenericVRegs.insert(Reg);
+ }
+ }
if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
.second)
return error(VReg.ID.SourceRange.Start,
@@ -360,9 +395,8 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
Twine(VReg.ID.Value) + "'");
if (!VReg.PreferredRegister.Value.empty()) {
unsigned PreferredReg = 0;
- if (parseNamedRegisterReference(PreferredReg, SM, MF,
- VReg.PreferredRegister.Value, PFS,
- IRSlots, Error))
+ if (parseNamedRegisterReference(PFS, PreferredReg,
+ VReg.PreferredRegister.Value, Error))
return error(Error, VReg.PreferredRegister.SourceRange);
RegInfo.setSimpleHint(Reg, PreferredReg);
}
@@ -371,13 +405,12 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
// Parse the liveins.
for (const auto &LiveIn : YamlMF.LiveIns) {
unsigned Reg = 0;
- if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS,
- IRSlots, Error))
+ if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error))
return error(Error, LiveIn.Register.SourceRange);
unsigned VReg = 0;
if (!LiveIn.VirtualRegister.Value.empty()) {
- if (parseVirtualRegisterReference(
- VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error))
+ if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value,
+ Error))
return error(Error, LiveIn.VirtualRegister.SourceRange);
}
RegInfo.addLiveIn(Reg, VReg);
@@ -389,8 +422,7 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
return false;
for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
unsigned Reg = 0;
- if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots,
- Error))
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
return error(Error, RegSource.SourceRange);
CalleeSavedRegisterMask[Reg] = true;
}
@@ -398,24 +430,25 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
return false;
}
-void MIRParserImpl::inferRegisterInfo(MachineFunction &MF,
+void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
if (YamlMF.CalleeSavedRegisters)
return;
- for (const MachineBasicBlock &MBB : MF) {
+ MachineRegisterInfo &MRI = PFS.MF.getRegInfo();
+ for (const MachineBasicBlock &MBB : PFS.MF) {
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
- MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask());
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
}
}
}
}
-bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
- const yaml::MachineFunction &YamlMF,
- PerFunctionMIParsingState &PFS) {
+bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
MachineFrameInfo &MFI = *MF.getFrameInfo();
const Function &F = *MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
@@ -435,13 +468,13 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
if (!YamlMFI.SavePoint.Value.empty()) {
MachineBasicBlock *MBB = nullptr;
- if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS))
+ if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint))
return true;
MFI.setSavePoint(MBB);
}
if (!YamlMFI.RestorePoint.Value.empty()) {
MachineBasicBlock *MBB = nullptr;
- if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS))
+ if (parseMBBReference(PFS, MBB, YamlMFI.RestorePoint))
return true;
MFI.setRestorePoint(MBB);
}
@@ -462,7 +495,7 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
return error(Object.ID.SourceRange.Start,
Twine("redefinition of fixed stack object '%fixed-stack.") +
Twine(Object.ID.Value) + "'");
- if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
ObjectIdx))
return true;
}
@@ -493,12 +526,12 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
return error(Object.ID.SourceRange.Start,
Twine("redefinition of stack object '%stack.") +
Twine(Object.ID.Value) + "'");
- if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
ObjectIdx))
return true;
if (Object.LocalOffset)
MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
- if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx))
+ if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
return true;
}
MFI.setCalleeSavedInfo(CSIInfo);
@@ -510,24 +543,21 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
if (!YamlMFI.StackProtector.Value.empty()) {
SMDiagnostic Error;
int FI;
- if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS,
- IRSlots, Error))
+ if (parseStackObjectReference(PFS, FI, YamlMFI.StackProtector.Value, Error))
return error(Error, YamlMFI.StackProtector.SourceRange);
MFI.setStackProtectorIndex(FI);
}
return false;
}
-bool MIRParserImpl::parseCalleeSavedRegister(
- MachineFunction &MF, PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
const yaml::StringValue &RegisterSource, int FrameIdx) {
if (RegisterSource.Value.empty())
return false;
unsigned Reg = 0;
SMDiagnostic Error;
- if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS,
- IRSlots, Error))
+ if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
return error(Error, RegisterSource.SourceRange);
CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
return false;
@@ -548,16 +578,15 @@ static bool typecheckMDNode(T *&Result, MDNode *Node,
return false;
}
-bool MIRParserImpl::parseStackObjectsDebugInfo(
- MachineFunction &MF, PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineStackObject &Object, int FrameIdx) {
// Debug information can only be attached to stack objects; Fixed stack
// objects aren't supported.
assert(FrameIdx >= 0 && "Expected a stack object frame index");
MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
- if (parseMDNode(Var, Object.DebugVar, MF, PFS) ||
- parseMDNode(Expr, Object.DebugExpr, MF, PFS) ||
- parseMDNode(Loc, Object.DebugLoc, MF, PFS))
+ if (parseMDNode(PFS, Var, Object.DebugVar) ||
+ parseMDNode(PFS, Expr, Object.DebugExpr) ||
+ parseMDNode(PFS, Loc, Object.DebugLoc))
return true;
if (!Var && !Expr && !Loc)
return false;
@@ -568,25 +597,24 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(
typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
return true;
- MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
return false;
}
-bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
- MachineFunction &MF,
- const PerFunctionMIParsingState &PFS) {
+bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS,
+ MDNode *&Node, const yaml::StringValue &Source) {
if (Source.Value.empty())
return false;
SMDiagnostic Error;
- if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error))
+ if (llvm::parseMDNode(PFS, Node, Source.Value, Error))
return error(Error, Source.SourceRange);
return false;
}
-bool MIRParserImpl::initializeConstantPool(
- MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF,
- const MachineFunction &MF,
- DenseMap<unsigned, unsigned> &ConstantPoolSlots) {
+bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) {
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots;
+ const MachineFunction &MF = PFS.MF;
const auto &M = *MF.getFunction()->getParent();
SMDiagnostic Error;
for (const auto &YamlConstant : YamlMF.Constants) {
@@ -608,15 +636,14 @@ bool MIRParserImpl::initializeConstantPool(
return false;
}
-bool MIRParserImpl::initializeJumpTableInfo(
- MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI,
- PerFunctionMIParsingState &PFS) {
- MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI) {
+ MachineJumpTableInfo *JTI = PFS.MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
for (const auto &Entry : YamlJTI.Entries) {
std::vector<MachineBasicBlock *> Blocks;
for (const auto &MBBSource : Entry.Blocks) {
MachineBasicBlock *MBB = nullptr;
- if (parseMBBReference(MBB, MBBSource.Value, MF, PFS))
+ if (parseMBBReference(PFS, MBB, MBBSource.Value))
return true;
Blocks.push_back(MBB);
}
@@ -630,12 +657,11 @@ bool MIRParserImpl::initializeJumpTableInfo(
return false;
}
-bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB,
- const yaml::StringValue &Source,
- MachineFunction &MF,
- const PerFunctionMIParsingState &PFS) {
+bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source) {
SMDiagnostic Error;
- if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error))
+ if (llvm::parseMBBReference(PFS, MBB, Source.Value, Error))
return error(Error, Source.SourceRange);
return false;
}
@@ -698,6 +724,21 @@ void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
}
}
+void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
+ if (!Names2RegBanks.empty())
+ return;
+ const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
+ // If the target does not support GlobalISel, we may not have a
+ // register bank info.
+ if (!RBI)
+ return;
+ for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+ const auto &RegBank = RBI->getRegBank(I);
+ Names2RegBanks.insert(
+ std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+ }
+}
+
const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
StringRef Name) {
initNames2RegClasses(MF);
@@ -707,6 +748,15 @@ const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
return RegClassInfo->getValue();
}
+const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
+ StringRef Name) {
+ initNames2RegBanks(MF);
+ auto RegBankInfo = Names2RegBanks.find(Name);
+ if (RegBankInfo == Names2RegBanks.end())
+ return nullptr;
+ return RegBankInfo->getValue();
+}
+
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index 175cb0d..703c99d 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,23 +14,25 @@
#include "MIRPrinter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -118,7 +120,8 @@ public:
void printOffset(int64_t Offset);
void printTargetFlags(const MachineOperand &Op);
void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
- unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false);
+ unsigned I, bool ShouldPrintRegisterTies,
+ const MachineRegisterInfo *MRI = nullptr, bool IsDef = false);
void print(const MachineMemOperand &Op);
void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
@@ -170,6 +173,9 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasInlineAsm = MF.hasInlineAsm();
+ YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
ModuleSlotTracker MST(MF.getFunction()->getParent());
MST.incorporateFunction(*MF.getFunction());
@@ -206,8 +212,15 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
- VReg.Class =
- StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ if (RegInfo.getRegClassOrNull(Reg))
+ VReg.Class =
+ StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ else if (RegInfo.getRegBankOrNull(Reg))
+ VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
+ else {
+ VReg.Class = std::string("_");
+ assert(RegInfo.getSize(Reg) && "Generic registers must have a size");
+ }
unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
if (PreferredReg)
printReg(PreferredReg, VReg.PreferredRegister, TRI);
@@ -525,7 +538,9 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) {
}
void MIPrinter::print(const MachineInstr &MI) {
- const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
+ const auto *MF = MI.getParent()->getParent();
+ const auto &MRI = MF->getRegInfo();
+ const auto &SubTarget = MF->getSubtarget();
const auto *TRI = SubTarget.getRegisterInfo();
assert(TRI && "Expected target register info");
const auto *TII = SubTarget.getInstrInfo();
@@ -540,7 +555,8 @@ void MIPrinter::print(const MachineInstr &MI) {
++I) {
if (I)
OS << ", ";
- print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
+ /*IsDef=*/true);
}
if (I)
@@ -548,6 +564,11 @@ void MIPrinter::print(const MachineInstr &MI) {
if (MI.getFlag(MachineInstr::FrameSetup))
OS << "frame-setup ";
OS << TII->getName(MI.getOpcode());
+ if (isPreISelGenericOpcode(MI.getOpcode())) {
+ assert(MI.getType() && "Generic instructions must have a type");
+ OS << ' ';
+ MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
+ }
if (I < E)
OS << ' ';
@@ -727,7 +748,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
}
void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
- unsigned I, bool ShouldPrintRegisterTies, bool IsDef) {
+ unsigned I, bool ShouldPrintRegisterTies,
+ const MachineRegisterInfo *MRI, bool IsDef) {
printTargetFlags(Op);
switch (Op.getType()) {
case MachineOperand::MO_Register:
@@ -754,6 +776,9 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
+ assert((!IsDef || MRI) && "for IsDef, MRI must be provided");
+ if (IsDef && MRI->getSize(Op.getReg()))
+ OS << '(' << MRI->getSize(Op.getReg()) << ')';
break;
case MachineOperand::MO_Immediate:
OS << Op.getImm();
@@ -858,11 +883,12 @@ void MIPrinter::print(const MachineMemOperand &Op) {
assert(Op.isStore() && "Non load machine operand must be a store");
OS << "store ";
}
- OS << Op.getSize() << (Op.isLoad() ? " from " : " into ");
+ OS << Op.getSize();
if (const Value *Val = Op.getValue()) {
+ OS << (Op.isLoad() ? " from " : " into ");
printIRValueReference(*Val);
- } else {
- const PseudoSourceValue *PVal = Op.getPseudoValue();
+ } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) {
+ OS << (Op.isLoad() ? " from " : " into ");
assert(PVal && "Expected a pseudo source value");
switch (PVal->kind()) {
case PseudoSourceValue::Stack:
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 85d544d..689dd07 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -199,16 +198,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
return end();
}
-const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
- // A block with a landing pad successor only has one other successor.
- if (succ_size() > 2)
- return nullptr;
- for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isEHPad())
- return *I;
- return nullptr;
-}
-
bool MachineBasicBlock::hasEHPadSuccessor() const {
for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
if ((*I)->isEHPad())
@@ -217,7 +206,7 @@ bool MachineBasicBlock::hasEHPadSuccessor() const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineBasicBlock::dump() const {
+LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
print(dbgs());
}
#endif
@@ -241,7 +230,8 @@ std::string MachineBasicBlock::getFullName() const {
return Name;
}
-void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
+ const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -255,7 +245,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
- SlotIndexes *Indexes) const {
+ const SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -302,16 +292,16 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << '\n';
}
- for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
+ for (auto &I : instrs()) {
if (Indexes) {
- if (Indexes->hasIndex(&*I))
- OS << Indexes->getInstructionIndex(&*I);
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
OS << '\t';
}
OS << '\t';
- if (I->isInsideBundle())
+ if (I.isInsideBundle())
OS << " * ";
- I->print(OS, MST);
+ I.print(OS, MST);
}
// Print the successors of this block according to the CFG.
@@ -414,24 +404,25 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
void MachineBasicBlock::updateTerminator() {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
// A block with no successors has no concerns with fall-through edges.
- if (this->succ_empty()) return;
+ if (this->succ_empty())
+ return;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
DebugLoc DL; // FIXME: this is nowhere
- bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
if (Cond.empty()) {
if (TBB) {
- // The block has an unconditional branch. If its successor is now
- // its layout successor, delete the branch.
+ // The block has an unconditional branch. If its successor is now its
+ // layout successor, delete the branch.
if (isLayoutSuccessor(TBB))
TII->RemoveBranch(*this);
} else {
- // The block has an unconditional fallthrough. If its successor is not
- // its layout successor, insert a branch. First we have to locate the
- // only non-landing-pad successor, as that is the fallthrough block.
+ // The block has an unconditional fallthrough. If its successor is not its
+ // layout successor, insert a branch. First we have to locate the only
+ // non-landing-pad successor, as that is the fallthrough block.
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
if ((*SI)->isEHPad())
continue;
@@ -439,8 +430,8 @@ void MachineBasicBlock::updateTerminator() {
TBB = *SI;
}
- // If there is no non-landing-pad successor, the block has no
- // fall-through edges to be concerned with.
+ // If there is no non-landing-pad successor, the block has no fall-through
+ // edges to be concerned with.
if (!TBB)
return;
@@ -449,61 +440,73 @@ void MachineBasicBlock::updateTerminator() {
if (!isLayoutSuccessor(TBB))
TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
}
- } else {
- if (FBB) {
- // The block has a non-fallthrough conditional branch. If one of its
- // successors is its layout successor, rewrite it to a fallthrough
- // conditional branch.
- if (isLayoutSuccessor(TBB)) {
- if (TII->ReverseBranchCondition(Cond))
- return;
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
- } else if (isLayoutSuccessor(FBB)) {
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
- }
- } else {
- // Walk through the successors and find the successor which is not
- // a landing pad and is not the conditional branch destination (in TBB)
- // as the fallthrough successor.
- MachineBasicBlock *FallthroughBB = nullptr;
- for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
- if ((*SI)->isEHPad() || *SI == TBB)
- continue;
- assert(!FallthroughBB && "Found more than one fallthrough successor.");
- FallthroughBB = *SI;
- }
- if (!FallthroughBB && canFallThrough()) {
- // We fallthrough to the same basic block as the conditional jump
- // targets. Remove the conditional jump, leaving unconditional
- // fallthrough.
- // FIXME: This does not seem like a reasonable pattern to support, but
- // it has been seen in the wild coming out of degenerate ARM test cases.
- TII->RemoveBranch(*this);
+ return;
+ }
- // Finally update the unconditional successor to be reached via a branch
- // if it would not be reached by fallthrough.
- if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
return;
- }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ }
+ return;
+ }
- // The block has a fallthrough conditional branch.
- if (isLayoutSuccessor(TBB)) {
- if (TII->ReverseBranchCondition(Cond)) {
- // We can't reverse the condition, add an unconditional branch.
- Cond.clear();
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
- return;
- }
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
- } else if (!isLayoutSuccessor(FallthroughBB)) {
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
- }
+ // Walk through the successors and find the successor which is not a landing
+ // pad and is not the conditional branch destination (in TBB) as the
+ // fallthrough successor.
+ MachineBasicBlock *FallthroughBB = nullptr;
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isEHPad() || *SI == TBB)
+ continue;
+ assert(!FallthroughBB && "Found more than one fallthrough successor.");
+ FallthroughBB = *SI;
+ }
+
+ if (!FallthroughBB) {
+ if (canFallThrough()) {
+ // We fallthrough to the same basic block as the conditional jump targets.
+ // Remove the conditional jump, leaving unconditional fallthrough.
+ // FIXME: This does not seem like a reasonable pattern to support, but it
+ // has been seen in the wild coming out of degenerate ARM test cases.
+ TII->RemoveBranch(*this);
+
+ // Finally update the unconditional successor to be reached via a branch if
+ // it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ return;
+ }
+
+ // We enter here iff exactly one successor is TBB which cannot fallthrough
+ // and the rest successors if any are EHPads. In this case, we need to
+ // change the conditional branch into unconditional branch.
+ TII->RemoveBranch(*this);
+ Cond.clear();
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ return;
+ }
+
+ // The block has a fallthrough conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ return;
}
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ } else if (!isLayoutSuccessor(FallthroughBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
}
}
@@ -685,13 +688,13 @@ bool MachineBasicBlock::canFallThrough() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
- if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+ if (TII->analyzeBranch(*this, TBB, FBB, Cond)) {
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
// is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
// Barrier is predicated and thus no longer an actual control barrier.
- return empty() || !back().isBarrier() || TII->isPredicated(&back());
+ return empty() || !back().isBarrier() || TII->isPredicated(back());
}
// If there is no branch, control always falls through.
@@ -712,39 +715,14 @@ bool MachineBasicBlock::canFallThrough() {
return FBB == nullptr;
}
-MachineBasicBlock *
-MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
- // Splitting the critical edge to a landing pad block is non-trivial. Don't do
- // it in this generic function.
- if (Succ->isEHPad())
+MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
+ Pass &P) {
+ if (!canSplitCriticalEdge(Succ))
return nullptr;
MachineFunction *MF = getParent();
DebugLoc DL; // FIXME: this is nowhere
- // Performance might be harmed on HW that implements branching using exec mask
- // where both sides of the branches are always executed.
- if (MF->getTarget().requiresStructuredCFG())
- return nullptr;
-
- // We may need to update this's terminator, but we can't do that if
- // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
- return nullptr;
-
- // Avoid bugpoint weirdness: A block may end with a conditional branch but
- // jumps to the same MBB is either case. We have duplicate CFG edges in that
- // case that we can't handle. Since this never happens in properly optimized
- // code, just skip those edges.
- if (TBB && TBB == FBB) {
- DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
- << getNumber() << '\n');
- return nullptr;
- }
-
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
DEBUG(dbgs() << "Splitting critical edge:"
@@ -752,8 +730,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
<< " -- BB#" << NMBB->getNumber()
<< " -- BB#" << Succ->getNumber() << '\n');
- LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>();
- SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>();
+ LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
if (LIS)
LIS->insertMBBInMaps(NMBB);
else if (Indexes)
@@ -762,7 +740,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// On some targets like Mips, branches may kill virtual registers. Make sure
// that LiveVariables is properly updated after updateTerminator replaces the
// terminators.
- LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>();
+ LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();
// Collect a list of virtual registers killed by the terminators.
SmallVector<unsigned, 4> KilledRegs;
@@ -777,7 +755,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
continue;
unsigned Reg = OI->getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- LV->getVarInfo(Reg).removeKill(MI)) {
+ LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
DEBUG(dbgs() << "Removing terminator kill: " << *MI);
OI->setIsKill(false);
@@ -826,24 +804,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
E = Terminators.end(); I != E; ++I) {
if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
NewTerminators.end())
- Indexes->removeMachineInstrFromMaps(*I);
+ Indexes->removeMachineInstrFromMaps(**I);
}
}
// Insert unconditional "jump Succ" instruction in NMBB if necessary.
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
- Cond.clear();
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
if (Indexes) {
- for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
- I != E; ++I) {
+ for (MachineInstr &MI : NMBB->instrs()) {
// Some instructions may have been moved to NMBB by updateTerminator(),
// so we first remove any instruction that already has an index.
- if (Indexes->hasIndex(&*I))
- Indexes->removeMachineInstrFromMaps(&*I);
- Indexes->insertMachineInstrInMaps(&*I);
+ if (Indexes->hasIndex(MI))
+ Indexes->removeMachineInstrFromMaps(MI);
+ Indexes->insertMachineInstrInMaps(MI);
}
}
}
@@ -942,10 +920,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>())
+ P.getAnalysisIfAvailable<MachineDominatorTree>())
MDT->recordSplitCriticalEdge(this, Succ, NMBB);
- if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
@@ -975,6 +953,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
return NMBB;
}
+bool MachineBasicBlock::canSplitCriticalEdge(
+ const MachineBasicBlock *Succ) const {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isEHPad())
+ return false;
+
+ const MachineFunction *MF = getParent();
+
+ // Performance might be harmed on HW that implements branching using exec mask
+ // where both sides of the branches are always executed.
+ if (MF->getTarget().requiresStructuredCFG())
+ return false;
+
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ // AnalyzeBanch should modify this, since we did not allow modification.
+ if (TII->analyzeBranch(*const_cast<MachineBasicBlock *>(this), TBB, FBB, Cond,
+ /*AllowModify*/ false))
+ return false;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return false;
+ }
+ return true;
+}
+
/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
/// neighboring instructions so the bundle won't be broken by removing MI.
static void unbundleSingleMI(MachineInstr *MI) {
@@ -1200,7 +1214,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
--I;
MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(I).analyzePhysReg(Reg, TRI);
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
// Defs happen after uses so they take precedence if both are present.
@@ -1208,8 +1222,15 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
if (Info.DeadDef)
return LQR_Dead;
// Register is (at least partially) live after a def.
- if (Info.Defined)
- return LQR_Live;
+ if (Info.Defined) {
+ if (!Info.PartialDeadDef)
+ return LQR_Live;
+ // As soon as we saw a partial definition (dead or not),
+ // we cannot tell if the value is partial live without
+ // tracking the lanemasks. We are not going to do this,
+ // so fall back on the remaining of the analysis.
+ break;
+ }
// Register is dead after a full kill or clobber and no def.
if (Info.Killed || Info.Clobbered)
return LQR_Dead;
@@ -1238,7 +1259,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
if (I != end()) {
for (++I; I != end() && N > 0; ++I, --N) {
MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(I).analyzePhysReg(Reg, TRI);
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
// Register is live when we read it here.
if (Info.Read)
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9119e31..6c0f99f 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -20,43 +20,44 @@
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "block-freq"
#ifndef NDEBUG
-enum GVDAGType {
- GVDT_None,
- GVDT_Fraction,
- GVDT_Integer
-};
-static cl::opt<GVDAGType>
-ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags",
- cl::Hidden,
- cl::desc("Pop up a window to show a dag displaying how machine block "
- "frequencies propagate through the CFG."),
- cl::values(
- clEnumValN(GVDT_None, "none",
- "do not display graphs."),
- clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
- "fractional block frequency representation."),
- clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
- "integer fractional block frequency representation."),
- clEnumValEnd));
+static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
+ "view-machine-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how machine block "
+ "frequencies propagate through the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count", "display a graph using the real "
+ "profile count if available."),
+
+ clEnumValEnd));
+
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+extern cl::opt<unsigned> ViewHotFreqPercent;
namespace llvm {
-template <>
-struct GraphTraits<MachineBlockFrequencyInfo *> {
+template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
typedef const MachineBasicBlock NodeType;
typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
typedef MachineFunction::const_iterator nodes_iterator;
- static inline
- const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) {
+ static inline const NodeType *
+ getEntryNode(const MachineBlockFrequencyInfo *G) {
return &G->getFunction()->front();
}
@@ -77,38 +78,33 @@ struct GraphTraits<MachineBlockFrequencyInfo *> {
}
};
-template<>
-struct DOTGraphTraits<MachineBlockFrequencyInfo*> :
- public DefaultDOTGraphTraits {
- explicit DOTGraphTraits(bool isSimple=false) :
- DefaultDOTGraphTraits(isSimple) {}
-
- static std::string getGraphName(const MachineBlockFrequencyInfo *G) {
- return G->getFunction()->getName();
- }
+typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
+ MachineBranchProbabilityInfo>
+ MBFIDOTGraphTraitsBase;
+template <>
+struct DOTGraphTraits<MachineBlockFrequencyInfo *>
+ : public MBFIDOTGraphTraitsBase {
+ explicit DOTGraphTraits(bool isSimple = false)
+ : MBFIDOTGraphTraitsBase(isSimple) {}
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineBlockFrequencyInfo *Graph) {
- std::string Result;
- raw_string_ostream OS(Result);
-
- OS << Node->getName().str() << ":";
- switch (ViewMachineBlockFreqPropagationDAG) {
- case GVDT_Fraction:
- Graph->printBlockFreq(OS, Node);
- break;
- case GVDT_Integer:
- OS << Graph->getBlockFreq(Node).getFrequency();
- break;
- case GVDT_None:
- llvm_unreachable("If we are not supposed to render a graph we should "
- "never reach this point.");
- }
-
- return Result;
+ return MBFIDOTGraphTraitsBase::getNodeLabel(
+ Node, Graph, ViewMachineBlockFreqPropagationDAG);
}
-};
+ std::string getNodeAttributes(const MachineBasicBlock *Node,
+ const MachineBlockFrequencyInfo *Graph) {
+ return MBFIDOTGraphTraitsBase::getNodeAttributes(Node, Graph,
+ ViewHotFreqPercent);
+ }
+
+ std::string getEdgeAttributes(const MachineBasicBlock *Node, EdgeIter EI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return MBFIDOTGraphTraitsBase::getEdgeAttributes(
+ Node, EI, MBFI, MBFI->getMBPI(), ViewHotFreqPercent);
+ }
+};
} // end namespace llvm
#endif
@@ -122,9 +118,8 @@ INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
char MachineBlockFrequencyInfo::ID = 0;
-
-MachineBlockFrequencyInfo::
-MachineBlockFrequencyInfo() :MachineFunctionPass(ID) {
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
+ : MachineFunctionPass(ID) {
initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
}
@@ -145,7 +140,9 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
MBFI.reset(new ImplType);
MBFI->calculate(F, MBPI, MLI);
#ifndef NDEBUG
- if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
+ if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
view();
}
#endif
@@ -163,19 +160,29 @@ void MachineBlockFrequencyInfo::view() const {
"MachineBlockFrequencyDAGs");
#else
errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
- "on systems with Graphviz or gv!\n";
+ "on systems with Graphviz or gv!\n";
#endif // NDEBUG
}
-BlockFrequency MachineBlockFrequencyInfo::
-getBlockFreq(const MachineBasicBlock *MBB) const {
+BlockFrequency
+MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
return MBFI ? MBFI->getBlockFreq(MBB) : 0;
}
+Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
+ const MachineBasicBlock *MBB) const {
+ const Function *F = MBFI->getFunction()->getFunction();
+ return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
+}
+
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
return MBFI ? MBFI->getFunction() : nullptr;
}
+const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const {
+ return MBFI ? &MBFI->getBPI() : nullptr;
+}
+
raw_ostream &
MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const {
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f5e3056..03dda8b 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "BranchFolding.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -62,10 +64,12 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
"blocks in the function."),
cl::init(0), cl::Hidden);
-static cl::opt<unsigned>
- AlignAllLoops("align-all-loops",
- cl::desc("Force the alignment of all loops in the function."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllNonFallThruBlocks(
+ "align-all-nofallthru-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks that have no fall-through predecessors (i.e. don't add "
+ "nops that are executed)."),
+ cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned> ExitBlockBias(
@@ -97,10 +101,15 @@ static cl::opt<bool>
cl::desc("Model the cost of loop rotation more "
"precisely by using profile data."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ForcePreciseRotationCost("force-precise-rotation-cost",
+ cl::desc("Force the use of precise cost "
+ "loop rotation strategy."),
+ cl::init(false), cl::Hidden);
static cl::opt<unsigned> MisfetchCost(
"misfetch-cost",
- cl::desc("Cost that models the probablistic risk of an instruction "
+ cl::desc("Cost that models the probabilistic risk of an instruction "
"misfetch due to a jump comparing to falling through, whose cost "
"is zero."),
cl::init(1), cl::Hidden);
@@ -109,6 +118,15 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+static cl::opt<bool>
+BranchFoldPlacement("branch-fold-placement",
+ cl::desc("Perform branch folding during placement. "
+ "Reduces code size."),
+ cl::init(true), cl::Hidden);
+
+extern cl::opt<unsigned> StaticLikelyProb;
+extern cl::opt<unsigned> ProfileLikelyProb;
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -149,7 +167,7 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
@@ -201,11 +219,16 @@ public:
}
#endif // NDEBUG
- /// \brief Count of predecessors within the loop currently being processed.
+ /// \brief Count of predecessors of any block within the chain which have not
+ /// yet been scheduled. In general, we will delay scheduling this chain
+ /// until those predecessors are scheduled (or we find a sufficiently good
+ /// reason to override this heuristic.) Note that when forming loop chains,
+ /// blocks outside the loop are ignored and treated as if they were already
+ /// scheduled.
///
- /// This count is updated at each loop we process to represent the number of
- /// in-loop predecessors of this chain.
- unsigned LoopPredecessors;
+ /// Note: This field is reinitialized multiple times - once for each loop,
+ /// and then once for the function as a whole.
+ unsigned UnscheduledPredecessors;
};
}
@@ -214,14 +237,21 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+ /// \brief work lists of blocks that are ready to be laid out
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+
+ /// \brief Machine Function
+ MachineFunction *F;
+
/// \brief A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
/// \brief A handle to the function-wide block frequency pass.
- const MachineBlockFrequencyInfo *MBFI;
+ std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
/// \brief A handle to the loop info.
- const MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI;
/// \brief A handle to the target's instruction info.
const TargetInstrInfo *TII;
@@ -254,33 +284,56 @@ class MachineBlockPlacement : public MachineFunctionPass {
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
+ BranchProbability
+ collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb,
+ BranchProbability HotProb);
+ bool
+ hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
MachineBasicBlock *
selectBestCandidateBlock(BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter);
+ SmallVectorImpl<MachineBasicBlock *> &WorkList);
MachineBasicBlock *
- getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+ getFirstUnplacedBlock(const BlockChain &PlacedChain,
MachineFunction::iterator &PrevUnplacedBlockIt,
const BlockFilterSet *BlockFilter);
+
+ /// \brief Add a basic block to the work list if it is appropriate.
+ ///
+ /// If the optional parameter BlockFilter is provided, only MBB
+ /// present in the set will be added to the worklist. If nullptr
+ /// is provided, no filtering occurs.
+ void fillWorkLists(MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
+ MachineBasicBlock *findBestLoopExit(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L);
- void buildLoopChains(MachineFunction &F, MachineLoop &L);
+ BlockFilterSet collectLoopBlockSet(MachineLoop &L);
+ void buildLoopChains(MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- void buildCFGChains(MachineFunction &F);
+ void collectMustExecuteBBs();
+ void buildCFGChains();
+ void optimizeBranches();
+ void alignBlocks();
public:
static char ID; // Pass identification, replacement for typeid
@@ -295,6 +348,7 @@ public:
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -319,18 +373,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
OS << "BB#" << BB->getNumber();
- OS << " (derived from LLVM BB '" << BB->getName() << "')";
- OS.flush();
- return Result;
-}
-
-/// \brief Helper to print the number of a MBB.
-///
-/// Only used by debug logging.
-static std::string getBlockNum(MachineBasicBlock *BB) {
- std::string Result;
- raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber();
+ OS << " ('" << BB->getName() << "')";
OS.flush();
return Result;
}
@@ -344,7 +387,6 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@@ -363,30 +405,26 @@ void MachineBlockPlacement::markChainSuccessors(
// This is a cross-chain edge that is within the loop, so decrement the
// loop predecessor count of the destination chain.
- if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
- BlockWorkList.push_back(*SuccChain.begin());
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *MBB = *SuccChain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
}
}
}
-/// \brief Select the best successor for a block.
-///
-/// This looks across all successors of a particular block and attempts to
-/// select the "best" one to be the layout successor. It only considers direct
-/// successors which also pass the block filter. It will attempt to avoid
-/// breaking CFG structure, but cave and break such structures in the case of
-/// very hot successor edges.
-///
-/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *
-MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
- const BranchProbability HotProb(4, 5); // 80%
-
- MachineBasicBlock *BestSucc = nullptr;
- auto BestProb = BranchProbability::getZero();
-
+/// This helper function collects the set of successors of block
+/// \p BB that are allowed to be its layout successors, and return
+/// the total branch probability of edges from \p BB to those
+/// blocks.
+BranchProbability MachineBlockPlacement::collectViableSuccessors(
+ MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors) {
// Adjust edge probabilities by excluding edges pointing to blocks that is
// either not in BlockFilter or is already in the current chain. Consider the
// following CFG:
@@ -400,20 +438,17 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
// A->C is chosen as a fall-through, D won't be selected as a successor of C
// due to CFG constraint (the probability of C->D is not greater than
- // HotProb). If we exclude E that is not in BlockFilter when calculating the
- // probability of C->D, D will be selected and we will get A C D B as the
- // layout of this loop.
+ // HotProb to break top-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
+ // will get A C D B as the layout of this loop.
auto AdjustedSumProb = BranchProbability::getOne();
- SmallVector<MachineBasicBlock *, 4> Successors;
for (MachineBasicBlock *Succ : BB->successors()) {
bool SkipSucc = false;
- if (BlockFilter && !BlockFilter->count(Succ)) {
+ if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
SkipSucc = true;
} else {
BlockChain *SuccChain = BlockToChain[Succ];
if (SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(Succ)
- << " -> Already merged!\n");
SkipSucc = true;
} else if (Succ != *SuccChain->begin()) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
@@ -426,78 +461,267 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
Successors.push_back(Succ);
}
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
- for (MachineBasicBlock *Succ : Successors) {
- BranchProbability SuccProb;
- uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator();
- uint32_t SuccProbD = AdjustedSumProb.getNumerator();
- if (SuccProbN >= SuccProbD)
- SuccProb = BranchProbability::getOne();
- else
- SuccProb = BranchProbability(SuccProbN, SuccProbD);
-
- // If we outline optional branches, look whether Succ is unavoidable, i.e.
- // dominates all terminators of the MachineFunction. If it does, other
- // successors must be optional. Don't do this for cold branches.
- if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
- UnavoidableBlocks.count(Succ) > 0) {
- auto HasShortOptionalBranch = [&]() {
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- // Check whether there is an unplaced optional branch.
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
- continue;
- // Check whether the optional branch has exactly one BB.
- if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
- continue;
- // Check whether the optional branch is small.
- if (Pred->size() < OutlineOptionalThreshold)
- return true;
- }
+ return AdjustedSumProb;
+}
+
+/// The helper function returns the branch probability that is adjusted
+/// or normalized over the new total \p AdjustedSumProb.
+static BranchProbability
+getAdjustedProbability(BranchProbability OrigProb,
+ BranchProbability AdjustedSumProb) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = OrigProb.getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
+
+ return SuccProb;
+}
+
+/// When the option OutlineOptionalBranches is on, this method
+/// checks if the fallthrough candidate block \p Succ (of block
+/// \p BB) also has other unscheduled predecessor blocks which
+/// are also successors of \p BB (forming triangular shape CFG).
+/// If none of such predecessors are small, it returns true.
+/// The caller can choose to select \p Succ as the layout successors
+/// so that \p Succ's predecessors (optional branches) can be
+/// outlined.
+/// FIXME: fold this with more general layout cost analysis.
+bool MachineBlockPlacement::shouldPredBlockBeOutlined(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
+ BranchProbability HotProb) {
+ if (!OutlineOptionalBranches)
+ return false;
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
return false;
- };
- if (!HasShortOptionalBranch())
- return Succ;
}
+ return true;
+ } else
+ return false;
+}
- // Only consider successors which are either "hot", or wouldn't violate
- // any CFG constraints.
- BlockChain &SuccChain = *BlockToChain[Succ];
- if (SuccChain.LoopPredecessors != 0) {
- if (SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (CFG conflict)\n");
- continue;
- }
+// When profile is not present, return the StaticLikelyProb.
+// When profile is available, we need to handle the triangle-shape CFG.
+static BranchProbability getLayoutSuccessorProbThreshold(
+ MachineBasicBlock *BB) {
+ if (!BB->getParent()->getFunction()->getEntryCount())
+ return BranchProbability(StaticLikelyProb, 100);
+ if (BB->succ_size() == 2) {
+ const MachineBasicBlock *Succ1 = *BB->succ_begin();
+ const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
+ if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
+ /* See case 1 below for the cost analysis. For BB->Succ to
+ * be taken with smaller cost, the following needs to hold:
+ * Prob(BB->Succ) > 2* Prob(BB->Pred)
+ * So the threshold T
+ * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
+ * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
+ * branch bias, we have
+ * T = (2/3)*(ProfileLikelyProb/50)
+ * = (2*ProfileLikelyProb)/150)
+ */
+ return BranchProbability(2 * ProfileLikelyProb, 150);
+ }
+ }
+ return BranchProbability(ProfileLikelyProb, 100);
+}
- // Make sure that a hot successor doesn't have a globally more
- // important predecessor.
- auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
- BlockFrequency CandidateEdgeFreq =
- MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
- bool BadCFGConflict = false;
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
- continue;
- BlockFrequency PredEdgeFreq =
- MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
- if (PredEdgeFreq >= CandidateEdgeFreq) {
- BadCFGConflict = true;
- break;
- }
- }
- if (BadCFGConflict) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (non-cold CFG conflict)\n");
- continue;
- }
+/// Checks to see if the layout candidate block \p Succ has a better layout
+/// predecessor than \c BB. If yes, returns true.
+bool MachineBlockPlacement::hasBetterLayoutPredecessor(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
+ BranchProbability SuccProb, BranchProbability RealSuccProb,
+ BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+
+ // There isn't a better layout when there are no unscheduled predecessors.
+ if (SuccChain.UnscheduledPredecessors == 0)
+ return false;
+
+ // There are two basic scenarios here:
+ // -------------------------------------
+ // Case 1: triangular shape CFG (if-then):
+ // BB
+ // | \
+ // | \
+ // | Pred
+ // | /
+ // Succ
+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
+ // set Succ as the layout successor of BB. Picking Succ as BB's
+ // successor breaks the CFG constraints (FIXME: define these constraints).
+ // With this layout, Pred BB
+ // is forced to be outlined, so the overall cost will be cost of the
+ // branch taken from BB to Pred, plus the cost of back taken branch
+ // from Pred to Succ, as well as the additional cost associated
+ // with the needed unconditional jump instruction from Pred To Succ.
+
+ // The cost of the topological order layout is the taken branch cost
+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
+ // must hold:
+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
+ // < freq(BB->Succ) * taken_branch_cost.
+ // Ignoring unconditional jump cost, we get
+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
+ // prob(BB->Succ) > 2 * prob(BB->Pred)
+ //
+ // When real profile data is available, we can precisely compute the
+ // probability threshold that is needed for edge BB->Succ to be considered.
+ // Without profile data, the heuristic requires the branch bias to be
+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
+ // -----------------------------------------------------------------
+ // Case 2: diamond like CFG (if-then-else):
+ // S
+ // / \
+ // | \
+ // BB Pred
+ // \ /
+ // Succ
+ // ..
+ //
+ // The current block is BB and edge BB->Succ is now being evaluated.
+ // Note that edge S->BB was previously already selected because
+ // prob(S->BB) > prob(S->Pred).
+ // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
+ // choose Pred, we will have a topological ordering as shown on the left
+ // in the picture below. If we choose Succ, we have the solution as shown
+ // on the right:
+ //
+ // topo-order:
+ //
+ // S----- ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | pred-- | Succ--
+ // | | | |
+ // ---succ ---pred--
+ //
+ // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
+ // = freq(S->Pred) + freq(S->BB)
+ //
+ // If we have profile data (i.e, branch probabilities can be trusted), the
+ // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
+ // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
+ // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
+ // means the cost of topological order is greater.
+ // When profile data is not available, however, we need to be more
+ // conservative. If the branch prediction is wrong, breaking the topo-order
+ // will actually yield a layout with large cost. For this reason, we need
+ // strong biased branch at block S with Prob(S->BB) in order to select
+ // BB->Succ. This is equivalent to looking the CFG backward with backward
+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
+ // profile data).
+
+ BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
+
+ // Forward checking. For case 2, SuccProb will be 1.
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob) (CFG conflict)\n");
+ return true;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more
+ // important predecessor.
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
+ bool BadCFGConflict = false;
+
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Do backward checking. For case 1, it is actually redundant check. For
+ // case 2 above, we need a backward checking to filter out edges that are
+ // not 'strongly' biased. With profile data available, the check is mostly
+ // redundant too (when threshold prob is set at 50%) unless S has more than
+ // two successors.
+ // BB Pred
+ // \ /
+ // Succ
+ // We select edge BB->Succ if
+ // freq(BB->Succ) > freq(Succ) * HotProb
+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
+ // HotProb
+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+ BlockFrequency PredEdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
+ BadCFGConflict = true;
+ break;
}
+ }
+ if (BadCFGConflict) {
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob)"
- << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
- << "\n");
+ << " (prob) (non-cold CFG conflict)\n");
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(StaticLikelyProb, 100);
+
+ MachineBasicBlock *BestSucc = nullptr;
+ auto BestProb = BranchProbability::getZero();
+
+ SmallVector<MachineBasicBlock *, 4> Successors;
+ auto AdjustedSumProb =
+ collectViableSuccessors(BB, Chain, BlockFilter, Successors);
+
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock *Succ : Successors) {
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
+ BranchProbability SuccProb =
+ getAdjustedProbability(RealSuccProb, AdjustedSumProb);
+
+ // This heuristic is off by default.
+ if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
+ HotProb))
+ return Succ;
+
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
+ // predecessor that yields lower global cost.
+ if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
+ Chain, BlockFilter))
+ continue;
+
+ DEBUG(
+ dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
if (BestSucc && BestProb >= SuccProb)
continue;
BestSucc = Succ;
@@ -513,12 +737,11 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
/// profitable only really makes sense in the context of a loop. This returns
/// the most frequently visited block in the worklist, which in the case of
/// a loop, is the one most desirable to be physically close to the rest of the
-/// loop body in order to improve icache behavior.
+/// loop body in order to improve i-cache behavior.
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter) {
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
@@ -529,24 +752,51 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
}),
WorkList.end());
+ if (WorkList.empty())
+ return nullptr;
+
+ bool IsEHPad = WorkList[0]->isEHPad();
+
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (MachineBasicBlock *MBB : WorkList) {
+ assert(MBB->isEHPad() == IsEHPad);
+
BlockChain &SuccChain = *BlockToChain[MBB];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
+ if (&SuccChain == &Chain)
continue;
- }
- assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
- if (BestBlock && BestFreq >= CandidateFreq)
+
+ // For ehpad, we layout the least probable first as to avoid jumping back
+ // from least probable landingpads to more probable ones.
+ //
+ // FIXME: Using probability is probably (!) not the best way to achieve
+ // this. We should probably have a more principled approach to layout
+ // cleanup code.
+ //
+ // The goal is to get:
+ //
+ // +--------------------------+
+ // | V
+ // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
+ //
+ // Rather than:
+ //
+ // +-------------------------------------+
+ // V |
+ // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
+ if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
continue;
+
BestBlock = MBB;
BestFreq = CandidateFreq;
}
+
return BestBlock;
}
@@ -558,10 +808,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
/// LastUnplacedBlockIt. We update this iterator on each call to avoid
/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
- MachineFunction &F, const BlockChain &PlacedChain,
+ const BlockChain &PlacedChain,
MachineFunction::iterator &PrevUnplacedBlockIt,
const BlockFilterSet *BlockFilter) {
- for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
++I) {
if (BlockFilter && !BlockFilter->count(&*I))
continue;
@@ -576,22 +826,51 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
return nullptr;
}
+void MachineBlockPlacement::fillWorkLists(
+ MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter = nullptr) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ if (!UpdatedPreds.insert(&Chain).second)
+ return;
+
+ assert(Chain.UnscheduledPredecessors == 0);
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockFilter && !BlockFilter->count(Pred))
+ continue;
+ if (BlockToChain[Pred] == &Chain)
+ continue;
+ ++Chain.UnscheduledPredecessors;
+ }
+ }
+
+ if (Chain.UnscheduledPredecessors != 0)
+ return;
+
+ MBB = *Chain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
+}
+
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
- assert(BB);
- assert(BlockToChain[BB] == &Chain);
- MachineFunction &F = *BB->getParent();
- MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+ assert(BB && "BB must not be null.\n");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
MachineBasicBlock *LoopHeaderBB = BB;
- markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
BB = *std::prev(Chain.end());
for (;;) {
- assert(BB);
- assert(BlockToChain[BB] == &Chain);
- assert(*std::prev(Chain.end()) == BB);
+ assert(BB && "null block found at end of chain in loop.");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
+ assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
+
// Look for the best viable successor if there is one to place immediately
// after this block.
@@ -601,11 +880,12 @@ void MachineBlockPlacement::buildChain(
// block among those we've identified as not violating the loop's CFG at
// this point. This won't be a fallthrough, but it will increase locality.
if (!BestSucc)
- BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
if (!BestSucc) {
- BestSucc =
- getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
if (!BestSucc)
break;
@@ -615,18 +895,18 @@ void MachineBlockPlacement::buildChain(
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
- // Zero out LoopPredecessors for the successor we're about to merge in case
+ // Zero out UnscheduledPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
- SuccChain.LoopPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
- << getBlockNum(BestSucc) << "\n");
- markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+ SuccChain.UnscheduledPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
+ << getBlockName(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
}
DEBUG(dbgs() << "Finished forming chain for header block "
- << getBlockNum(*Chain.begin()) << "\n");
+ << getBlockName(*Chain.begin()) << "\n");
}
/// \brief Find the best loop top block for layout.
@@ -673,8 +953,10 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
}
// If no direct predecessor is fine, just use the loop header.
- if (!BestPred)
+ if (!BestPred) {
+ DEBUG(dbgs() << " final top unchanged\n");
return L.getHeader();
+ }
// Walk backwards through any straight line of predecessors.
while (BestPred->pred_size() == 1 &&
@@ -692,7 +974,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -710,7 +992,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
unsigned BestExitLoopDepth = 0;
MachineBasicBlock *ExitingBB = nullptr;
// If there are exits to outer loops, loop rotation can severely limit
- // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // fallthrough opportunities unless it selects such an exit. Keep a set of
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
@@ -780,7 +1062,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
// Restore the old exiting state, no viable looping successor was found.
ExitingBB = OldExitingBB;
BestExitEdgeFreq = OldBestExitEdgeFreq;
- continue;
}
}
// Without a candidate exiting block or with only a single block in the
@@ -973,7 +1254,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
}
- DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter)
+ DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
<< " to the top: " << Cost.getFrequency() << "\n");
if (Cost < SmallestRotationCost) {
@@ -983,7 +1264,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
}
if (RotationPos != LoopChain.end()) {
- DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos)
+ DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
<< " to the top\n");
std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
}
@@ -994,7 +1275,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
-MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
+MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
BlockFilterSet LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
@@ -1006,7 +1287,7 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
// will be merged into the first outer loop chain for which this block is not
// cold anymore. This needs precise profile data and we only do this when
// profile data is available.
- if (F.getFunction()->getEntryCount()) {
+ if (F->getFunction()->getEntryCount()) {
BlockFrequency LoopFreq(0);
for (auto LoopPred : L.getHeader()->predecessors())
if (!L.contains(LoopPred))
@@ -1031,21 +1312,22 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
-void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
- MachineLoop &L) {
+void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
for (MachineLoop *InnerLoop : L)
- buildLoopChains(F, *InnerLoop);
+ buildLoopChains(*InnerLoop);
- SmallVector<MachineBasicBlock *, 16> BlockWorkList;
- BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
// Check if we have profile data for this function. If yes, we will rotate
// this loop by modeling costs more precisely which requires the profile data
// for better layout.
bool RotateLoopWithProfile =
- PreciseRotationCost && F.getFunction()->getEntryCount();
+ ForcePreciseRotationCost ||
+ (PreciseRotationCost && F->getFunction()->getEntryCount());
// First check to see if there is an obviously preferable top block for the
// loop. This will default to the header, but may end up as one of the
@@ -1060,7 +1342,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
// branches by placing an exit edge at the bottom.
MachineBasicBlock *ExitingBB = nullptr;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
+ ExitingBB = findBestLoopExit(L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -1068,29 +1350,13 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
// walk the blocks, and use a set to prevent visiting a particular chain
// twice.
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- assert(LoopChain.LoopPredecessors == 0);
+ assert(LoopChain.UnscheduledPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : LoopBlockSet) {
- BlockChain &Chain = *BlockToChain[LoopBB];
- if (!UpdatedPreds.insert(&Chain).second)
- continue;
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
- assert(Chain.LoopPredecessors == 0);
- for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
- for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
- if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
- continue;
- ++Chain.LoopPredecessors;
- }
- }
-
- if (Chain.LoopPredecessors == 0)
- BlockWorkList.push_back(*Chain.begin());
- }
-
- buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+ buildChain(LoopTop, LoopChain, &LoopBlockSet);
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
@@ -1100,7 +1366,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
DEBUG({
// Crash at the end so we get all of the debugging output first.
bool BadLoop = false;
- if (LoopChain.LoopPredecessors) {
+ if (LoopChain.UnscheduledPredecessors) {
BadLoop = true;
dbgs() << "Loop chain contains a block without its preds placed!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
@@ -1129,13 +1395,42 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
}
assert(!BadLoop && "Detected problems with the placement of this loop.");
});
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+/// When OutlineOpitonalBranches is on, this method collects BBs that
+/// dominates all terminator blocks of the function \p F.
+void MachineBlockPlacement::collectMustExecuteBBs() {
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : *F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : *F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
}
-void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
+ ++FI) {
MachineBasicBlock *BB = &*FI;
BlockChain *Chain =
new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
@@ -1144,7 +1439,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
for (;;) {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
MachineFunction::iterator NextFI = std::next(FI);
@@ -1161,55 +1456,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
- if (OutlineOptionalBranches) {
- // Find the nearest common dominator of all of F's terminators.
- MachineBasicBlock *Terminator = nullptr;
- for (MachineBasicBlock &MBB : F) {
- if (MBB.succ_size() == 0) {
- if (Terminator == nullptr)
- Terminator = &MBB;
- else
- Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
- }
- }
-
- // MBBs dominating this common dominator are unavoidable.
- UnavoidableBlocks.clear();
- for (MachineBasicBlock &MBB : F) {
- if (MDT->dominates(&MBB, Terminator)) {
- UnavoidableBlocks.insert(&MBB);
- }
- }
- }
+ // Turned on with OutlineOptionalBranches option
+ collectMustExecuteBBs();
// Build any loop-based chains.
for (MachineLoop *L : *MLI)
- buildLoopChains(F, *L);
+ buildLoopChains(*L);
- SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- for (MachineBasicBlock &MBB : F) {
- BlockChain &Chain = *BlockToChain[&MBB];
- if (!UpdatedPreds.insert(&Chain).second)
- continue;
-
- assert(Chain.LoopPredecessors == 0);
- for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
- for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
- if (BlockToChain[Pred] == &Chain)
- continue;
- ++Chain.LoopPredecessors;
- }
- }
-
- if (Chain.LoopPredecessors == 0)
- BlockWorkList.push_back(*Chain.begin());
- }
+ for (MachineBasicBlock &MBB : *F)
+ fillWorkLists(&MBB, UpdatedPreds);
- BlockChain &FunctionChain = *BlockToChain[&F.front()];
- buildChain(&F.front(), FunctionChain, BlockWorkList);
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ buildChain(&F->front(), FunctionChain);
#ifndef NDEBUG
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
@@ -1218,7 +1480,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
- for (MachineBasicBlock &MBB : F)
+ for (MachineBasicBlock &MBB : *F)
FunctionBlockSet.insert(&MBB);
for (MachineBasicBlock *ChainBB : FunctionChain)
@@ -1238,13 +1500,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
});
// Splice the blocks into place.
- MachineFunction::iterator InsertPos = F.begin();
+ MachineFunction::iterator InsertPos = F->begin();
+ DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
for (MachineBasicBlock *ChainBB : FunctionChain) {
DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
: " ... ")
<< getBlockName(ChainBB) << "\n");
if (InsertPos != MachineFunction::iterator(ChainBB))
- F.splice(InsertPos, ChainBB);
+ F->splice(InsertPos, ChainBB);
else
++InsertPos;
@@ -1258,69 +1521,90 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// boiler plate.
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
- // The "PrevBB" is not yet updated to reflect current code layout, so,
- // o. it may fall-through to a block without explict "goto" instruction
- // before layout, and no longer fall-through it after layout; or
- // o. just opposite.
- //
- // AnalyzeBranch() may return erroneous value for FBB when these two
- // situations take place. For the first scenario FBB is mistakenly set
- // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
- // is mistakenly pointing to "*BI".
- //
- bool needUpdateBr = true;
- if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
- PrevBB->updateTerminator();
- needUpdateBr = false;
- Cond.clear();
- TBB = FBB = nullptr;
- if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
- // FIXME: This should never take place.
- TBB = FBB = nullptr;
- }
- }
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explicit "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // analyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set NULL;
+ // for the 2nd scenario, the FBB, which is expected to be NULL, is
+ // mistakenly pointing to "*BI".
+ // Thus, if the future change needs to use FBB before the layout is set, it
+ // has to correct FBB first by using the code similar to the following:
+ //
+ // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
+ // PrevBB->updateTerminator();
+ // Cond.clear();
+ // TBB = FBB = nullptr;
+ // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // // FIXME: This should never take place.
+ // TBB = FBB = nullptr;
+ // }
+ // }
+ if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
+ PrevBB->updateTerminator();
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
+ F->back().updateTerminator();
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+void MachineBlockPlacement::optimizeBranches() {
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+
+ // Now that all the basic blocks in the chain have the proper layout,
+ // make a final call to AnalyzeBranch with AllowModify set.
+ // Indeed, the target may be able to optimize the branches in a way we
+ // cannot because all branches may not be analyzable.
+ // E.g., the target may be able to remove an unconditional branch to
+ // a fallthrough when it occurs after predicated terminators.
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
// If PrevBB has a two-way branch, try to re-order the branches
// such that we branch to the successor with higher probability first.
if (TBB && !Cond.empty() && FBB &&
- MBPI->getEdgeProbability(PrevBB, FBB) >
- MBPI->getEdgeProbability(PrevBB, TBB) &&
+ MBPI->getEdgeProbability(ChainBB, FBB) >
+ MBPI->getEdgeProbability(ChainBB, TBB) &&
!TII->ReverseBranchCondition(Cond)) {
DEBUG(dbgs() << "Reverse order of the two branches: "
- << getBlockName(PrevBB) << "\n");
+ << getBlockName(ChainBB) << "\n");
DEBUG(dbgs() << " Edge probability: "
- << MBPI->getEdgeProbability(PrevBB, FBB) << " vs "
- << MBPI->getEdgeProbability(PrevBB, TBB) << "\n");
+ << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*PrevBB);
- TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
- needUpdateBr = true;
+ TII->RemoveBranch(*ChainBB);
+ TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+ ChainBB->updateTerminator();
}
- if (needUpdateBr)
- PrevBB->updateTerminator();
}
}
+}
- // Fixup the last block.
- Cond.clear();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
- if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
- F.back().updateTerminator();
-
+void MachineBlockPlacement::alignBlocks() {
// Walk through the backedges of the function now that we have fully laid out
// the basic blocks and align the destination of each backedge. We don't rely
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- // FIXME: Use Function::optForSize().
- if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ if (F->getFunction()->optForSize())
return;
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
- BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front());
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
for (MachineBasicBlock *ChainBB : FunctionChain) {
if (ChainBB == *FunctionChain.begin())
@@ -1334,11 +1618,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!L)
continue;
- if (AlignAllLoops) {
- ChainBB->setAlignment(AlignAllLoops);
- continue;
- }
-
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
continue; // Don't care about loop alignment.
@@ -1380,31 +1659,67 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
-bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
- // Check for single-block functions and skip them.
- if (std::next(F.begin()) == F.end())
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
return false;
- if (skipOptnoneFunction(*F.getFunction()))
+ // Check for single-block functions and skip them.
+ if (std::next(MF.begin()) == MF.end())
return false;
+ F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
- TII = F.getSubtarget().getInstrInfo();
- TLI = F.getSubtarget().getTargetLowering();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
- buildCFGChains(F);
+ buildCFGChains();
+
+ // Changing the layout can create new tail merging opportunities.
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structured CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge() &&
+ BranchFoldPlacement;
+ // No tail merging opportunities if the block number is less than four.
+ if (MF.size() > 3 && EnableTailMerge) {
+ BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
+ *MBPI);
+
+ if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ /*AfterBlockPlacement=*/true)) {
+ // Redo the layout if tail merging creates/removes/moves blocks.
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+ buildCFGChains();
+ }
+ }
+
+ optimizeBranches();
+ alignBlocks();
BlockToChain.clear();
ChainAllocator.DestroyAll();
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineBasicBlock &MBB : F)
+ for (MachineBasicBlock &MBB : MF)
MBB.setAlignment(AlignAllBlock);
+ else if (AlignAllNonFallThruBlocks) {
+ // Align all of the blocks that have no fall-through predecessors to a
+ // specific alignment.
+ for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
+ auto LayoutPred = std::prev(MBI);
+ if (!LayoutPred->isSuccessor(&*MBI))
+ MBI->setAlignment(AlignAllNonFallThruBlocks);
+ }
+ }
// We always return true as we have no way to track whether the final order
// differs from the original order.
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index cf6d401..fe73406 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -24,9 +24,21 @@ INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
"Machine Branch Probability Analysis", false, true)
+cl::opt<unsigned>
+ StaticLikelyProb("static-likely-prob",
+ cl::desc("branch probability threshold in percentage"
+ "to be considered very likely"),
+ cl::init(80), cl::Hidden);
+
+cl::opt<unsigned> ProfileLikelyProb(
+ "profile-likely-prob",
+ cl::desc("branch probability threshold in percentage to be considered"
+ " very likely when profile is available"),
+ cl::init(51), cl::Hidden);
+
char MachineBranchProbabilityInfo::ID = 0;
-void MachineBranchProbabilityInfo::anchor() { }
+void MachineBranchProbabilityInfo::anchor() {}
BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
const MachineBasicBlock *Src,
@@ -42,11 +54,9 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
std::find(Src->succ_begin(), Src->succ_end(), Dst));
}
-bool
-MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
- const MachineBasicBlock *Dst) const {
- // Hot probability is at least 4/5 = 80%
- static BranchProbability HotProb(4, 5);
+bool MachineBranchProbabilityInfo::isEdgeHot(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
+ BranchProbability HotProb(StaticLikelyProb, 100);
return getEdgeProbability(Src, Dst) > HotProb;
}
@@ -63,7 +73,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
}
}
- static BranchProbability HotProb(4, 5);
+ BranchProbability HotProb(StaticLikelyProb, 100);
if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
return MaxSucc;
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index aad376c..1209f73 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -352,6 +352,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// This is a trivial form of alias analysis.
return false;
}
+
+ // Ignore stack guard loads, otherwise the register that holds CSEed value may
+ // be spilled and get loaded back with corrupted data.
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
+ return false;
+
return true;
}
@@ -383,7 +389,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
- if (TII->isAsCheapAsAMove(MI)) {
+ if (TII->isAsCheapAsAMove(*MI)) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -472,8 +478,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Commute commutable instructions.
bool Commuted = false;
if (!FoundCSE && MI->isCommutable()) {
- MachineInstr *NewMI = TII->commuteInstruction(MI);
- if (NewMI) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
Commuted = true;
FoundCSE = VNT.count(NewMI);
if (NewMI != MI) {
@@ -482,7 +487,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
Changed = true;
} else if (!FoundCSE)
// MI was changed but it didn't help, commute it back!
- (void)TII->commuteInstruction(MI);
+ (void)TII->commuteInstruction(*MI);
}
}
@@ -698,7 +703,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
}
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
TII = MF.getSubtarget().getInstrInfo();
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index fa43c4d..6b5c6ba 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "machine-combiner"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -24,7 +24,6 @@
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -41,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
MachineRegisterInfo *MRI;
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
@@ -87,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID;
INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
"Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
false, false)
@@ -94,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
@@ -156,7 +158,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
} else {
MachineInstr *DefInstr = getOperandDef(MO);
if (DefInstr) {
- DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
+ DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
LatencyOp = TSchedModel.computeOperandLatency(
DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
@@ -198,7 +200,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
RI++;
MachineInstr *UseMO = RI->getParent();
unsigned LatencyOp = 0;
- if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
+ if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
LatencyOp = TSchedModel.computeOperandLatency(
NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
UseMO->findRegisterUseOperandIdx(MO.getReg()));
@@ -250,7 +252,7 @@ bool MachineCombiner::improvesCriticalPathLen(
// Get depth and latency of NewRoot and Root.
unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
- unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+ unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
@@ -269,7 +271,7 @@ bool MachineCombiner::improvesCriticalPathLen(
// even if the instruction depths (data dependency cycles) become worse.
unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
- unsigned RootSlack = BlockTrace.getInstrSlack(Root);
+ unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
dbgs() << " RootLatency: " << RootLatency << "\n";
@@ -281,7 +283,7 @@ bool MachineCombiner::improvesCriticalPathLen(
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
-
+
return NewCycleCount <= OldCycleCount;
}
@@ -355,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
auto BlockIter = MBB->begin();
+ // Check if the block is in a loop.
+ const MachineLoop *ML = MLI->getLoopFor(MBB);
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
@@ -407,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (!NewInstCount)
continue;
+ bool SubstituteAlways = false;
+ if (ML && TII->isThroughputPattern(P))
+ SubstituteAlways = true;
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (doSubstitute(NewInstCount, OldInstCount) ||
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
(improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
InstrIdxForVirtReg, P) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
@@ -448,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
SchedModel = STI.getSchedModel();
TSchedModel.init(SchedModel, &STI, TII);
MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
OptSize = MF.getFunction()->optForSize();
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index a686341..8fdf39d 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -33,27 +32,47 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
namespace {
+ typedef SmallVector<unsigned, 4> RegList;
+ typedef DenseMap<unsigned, RegList> SourceMap;
+ typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap;
+
class MachineCopyPropagation : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
+ const MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification, replacement for typeid
MachineCopyPropagation() : MachineFunctionPass(ID) {
- initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
- private:
- typedef SmallVector<unsigned, 4> DestList;
- typedef DenseMap<unsigned, DestList> SourceMap;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
- void SourceNoLongerAvailable(unsigned Reg,
- SourceMap &SrcMap,
- DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
- bool CopyPropagateBlock(MachineBasicBlock &MBB);
+ private:
+ void ClobberRegister(unsigned Reg);
+ void CopyPropagateBlock(MachineBasicBlock &MBB);
+ bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+
+ /// Candidates for deletion.
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+ /// Def -> available copies map.
+ Reg2MIMap AvailCopyMap;
+ /// Def -> copies map.
+ Reg2MIMap CopyMap;
+ /// Src -> Def map
+ SourceMap SrcMap;
+ bool Changed;
};
}
char MachineCopyPropagation::ID = 0;
@@ -62,79 +81,105 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
"Machine Copy Propagation Pass", false, false)
-void
-MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
- SourceMap &SrcMap,
- DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+/// Remove any entry in \p Map where the register is a subregister or equal to
+/// a register contained in \p Regs.
+static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned Reg : Regs) {
+ // Source of copy is no longer available for propagation.
+ for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR)
+ Map.erase(*SR);
+ }
+}
+
+/// Remove any entry in \p Map that is marked clobbered in \p RegMask.
+/// The map will typically have a lot fewer entries than the regmask clobbers,
+/// so this is more efficient than iterating the clobbered registers and calling
+/// ClobberRegister() on them.
+static void removeClobberedRegsFromMap(Reg2MIMap &Map,
+ const MachineOperand &RegMask) {
+ for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E;
+ I = Next) {
+ Next = std::next(I);
+ unsigned Reg = I->first;
+ if (RegMask.clobbersPhysReg(Reg))
+ Map.erase(I);
+ }
+}
+
+void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+
SourceMap::iterator SI = SrcMap.find(*AI);
if (SI != SrcMap.end()) {
- const DestList& Defs = SI->second;
- for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
- I != E; ++I) {
- unsigned MappedDef = *I;
- // Source of copy is no longer available for propagation.
- AvailCopyMap.erase(MappedDef);
- for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
- AvailCopyMap.erase(*SR);
- }
+ removeRegsFromMap(AvailCopyMap, SI->second, *TRI);
+ SrcMap.erase(SI);
}
}
}
-static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
- const MachineInstr *MI) {
- const MachineBasicBlock *MBB = CopyMI->getParent();
- if (MI->getParent() != MBB)
- return false;
- MachineBasicBlock::const_iterator I = CopyMI;
- MachineBasicBlock::const_iterator E = MBB->end();
- MachineBasicBlock::const_iterator E2 = MI;
-
- ++I;
- while (I != E && I != E2) {
- if (I->hasUnmodeledSideEffects() || I->isCall() ||
- I->isTerminator())
- return false;
- ++I;
+/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
+/// This fact may have been obscured by sub register usage or may not be true at
+/// all even though Src and Def are subregisters of the registers used in
+/// PreviousCopy. e.g.
+/// isNopCopy("ecx = COPY eax", AX, CX) == true
+/// isNopCopy("ecx = COPY eax", AH, CL) == false
+static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
+ unsigned Def, const TargetRegisterInfo *TRI) {
+ unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg();
+ unsigned PreviousDef = PreviousCopy.getOperand(0).getReg();
+ if (Src == PreviousSrc) {
+ assert(Def == PreviousDef);
+ return true;
}
- return true;
+ if (!TRI->isSubRegister(PreviousSrc, Src))
+ return false;
+ unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
+ return SubIdx == TRI->getSubRegIndex(PreviousDef, Def);
}
-/// isNopCopy - Return true if the specified copy is really a nop. That is
-/// if the source of the copy is the same of the definition of the copy that
-/// supplied the source. If the source of the copy is a sub-register than it
-/// must check the sub-indices match. e.g.
-/// ecx = mov eax
-/// al = mov cl
-/// But not
-/// ecx = mov eax
-/// al = mov ch
-static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
- const TargetRegisterInfo *TRI) {
- unsigned SrcSrc = CopyMI->getOperand(1).getReg();
- if (Def == SrcSrc)
- return true;
- if (TRI->isSubRegister(SrcSrc, Def)) {
- unsigned SrcDef = CopyMI->getOperand(0).getReg();
- unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
- if (!SubIdx)
- return false;
- return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
- }
+/// Remove instruction \p Copy if there exists a previous copy that copies the
+/// register \p Src to the register \p Def; This may happen indirectly by
+/// copying the super registers.
+bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
+ unsigned Def) {
+ // Avoid eliminating a copy from/to a reserved registers as we cannot predict
+ // the value (Example: The sparc zero register is writable but stays zero).
+ if (MRI->isReserved(Src) || MRI->isReserved(Def))
+ return false;
- return false;
-}
+ // Search for an existing copy.
+ Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
+ if (CI == AvailCopyMap.end())
+ return false;
+
+ // Check that the existing copy uses the correct sub registers.
+ MachineInstr &PrevCopy = *CI->second;
+ if (!isNopCopy(PrevCopy, Src, Def, TRI))
+ return false;
-bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
- SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
- DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map
- DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map
- SourceMap SrcMap; // Src -> Def map
+ DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
+ // Copy was redundantly redefining either Src or Def. Remove earlier kill
+ // flags between Copy and PrevCopy because the value will be reused now.
+ assert(Copy.isCopy());
+ unsigned CopyDef = Copy.getOperand(0).getReg();
+ assert(CopyDef == Src || CopyDef == Def);
+ for (MachineInstr &MI :
+ make_range(PrevCopy.getIterator(), Copy.getIterator()))
+ MI.clearRegisterKills(CopyDef, TRI);
+
+ Copy.eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ return true;
+}
+
+void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
- bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
MachineInstr *MI = &*I;
++I;
@@ -143,48 +188,32 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
unsigned Def = MI->getOperand(0).getReg();
unsigned Src = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Def) ||
- TargetRegisterInfo::isVirtualRegister(Src))
- report_fatal_error("MachineCopyPropagation should be run after"
- " register allocation!");
-
- DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
- if (CI != AvailCopyMap.end()) {
- MachineInstr *CopyMI = CI->second;
- if (!MRI->isReserved(Def) &&
- (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
- isNopCopy(CopyMI, Def, Src, TRI)) {
- // The two copies cancel out and the source of the first copy
- // hasn't been overridden, eliminate the second one. e.g.
- // %ECX<def> = COPY %EAX<kill>
- // ... nothing clobbered EAX.
- // %EAX<def> = COPY %ECX
- // =>
- // %ECX<def> = COPY %EAX
- //
- // Also avoid eliminating a copy from reserved registers unless the
- // definition is proven not clobbered. e.g.
- // %RSP<def> = COPY %RAX
- // CALL
- // %RAX<def> = COPY %RSP
-
- DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump());
-
- // Clear any kills of Def between CopyMI and MI. This extends the
- // live range.
- for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
- I->clearRegisterKills(Def, TRI);
-
- MI->eraseFromParent();
- Changed = true;
- ++NumDeletes;
- continue;
- }
- }
+ assert(!TargetRegisterInfo::isVirtualRegister(Def) &&
+ !TargetRegisterInfo::isVirtualRegister(Src) &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // or
+ //
+ // %ECX<def> = COPY %EAX
+ // ... nothing clobbered EAX.
+ // %ECX<def> = COPY %EAX
+ // =>
+ // %ECX<def> = COPY %EAX
+ if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
+ continue;
- // If Src is defined by a previous copy, it cannot be eliminated.
+ // If Src is defined by a previous copy, the previous copy cannot be
+ // eliminated.
for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
- CI = CopyMap.find(*AI);
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
if (CI != CopyMap.end()) {
DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
MaybeDeadCopies.remove(CI->second);
@@ -194,23 +223,19 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
// Copy is now a candidate for deletion.
- MaybeDeadCopies.insert(MI);
+ if (!MRI->isReserved(Def))
+ MaybeDeadCopies.insert(MI);
- // If 'Src' is previously source of another copy, then this earlier copy's
+ // If 'Def' is previously source of another copy, then this earlier copy's
// source is no longer available. e.g.
// %xmm9<def> = copy %xmm2
// ...
// %xmm2<def> = copy %xmm0
// ...
// %xmm2<def> = copy %xmm9
- SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+ ClobberRegister(Def);
// Remember Def is defined by the copy.
- // ... Make sure to clear the def maps of aliases first.
- for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) {
- CopyMap.erase(*AI);
- AvailCopyMap.erase(*AI);
- }
for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
++SR) {
CopyMap[*SR] = MI;
@@ -219,30 +244,27 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Remember source that's copied to Def. Once it's clobbered, then
// it's no longer available for copy propagation.
- if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
- SrcMap[Src].end()) {
- SrcMap[Src].push_back(Def);
- }
+ RegList &DestList = SrcMap[Src];
+ if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end())
+ DestList.push_back(Def);
continue;
}
// Not a copy.
SmallVector<unsigned, 2> Defs;
- int RegMaskOpNum = -1;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand *RegMask = nullptr;
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
- RegMaskOpNum = i;
+ RegMask = &MO;
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- report_fatal_error("MachineCopyPropagation should be run after"
- " register allocation!");
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "MachineCopyPropagation should be run after register allocation!");
if (MO.isDef()) {
Defs.push_back(Reg);
@@ -252,7 +274,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination.
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
if (CI != CopyMap.end()) {
DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
MaybeDeadCopies.remove(CI->second);
@@ -269,78 +291,81 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
// The instruction has a register mask operand which means that it clobbers
- // a large set of registers. It is possible to use the register mask to
- // prune the available copies, but treat it like a basic block boundary for
- // now.
- if (RegMaskOpNum >= 0) {
+ // a large set of registers. Treat clobbered registers the same way as
+ // defined registers.
+ if (RegMask) {
// Erase any MaybeDeadCopies whose destination register is clobbered.
- const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
- for (SmallSetVector<MachineInstr*, 8>::iterator
- DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
- DI != DE; ++DI) {
- unsigned Reg = (*DI)->getOperand(0).getReg();
- if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
+ for (SmallSetVector<MachineInstr *, 8>::iterator DI =
+ MaybeDeadCopies.begin();
+ DI != MaybeDeadCopies.end();) {
+ MachineInstr *MaybeDead = *DI;
+ unsigned Reg = MaybeDead->getOperand(0).getReg();
+ assert(!MRI->isReserved(Reg));
+
+ if (!RegMask->clobbersPhysReg(Reg)) {
+ ++DI;
continue;
+ }
+
DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
- (*DI)->dump());
- (*DI)->eraseFromParent();
+ MaybeDead->dump());
+
+ // erase() will return the next valid iterator pointing to the next
+ // element after the erased one.
+ DI = MaybeDeadCopies.erase(DI);
+ MaybeDead->eraseFromParent();
Changed = true;
++NumDeletes;
}
- // Clear all data structures as if we were beginning a new basic block.
- MaybeDeadCopies.clear();
- AvailCopyMap.clear();
- CopyMap.clear();
- SrcMap.clear();
- continue;
- }
-
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- unsigned Reg = Defs[i];
-
- // No longer defined by a copy.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- CopyMap.erase(*AI);
- AvailCopyMap.erase(*AI);
+ removeClobberedRegsFromMap(AvailCopyMap, *RegMask);
+ removeClobberedRegsFromMap(CopyMap, *RegMask);
+ for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next;
+ I != E; I = Next) {
+ Next = std::next(I);
+ if (RegMask->clobbersPhysReg(I->first)) {
+ removeRegsFromMap(AvailCopyMap, I->second, *TRI);
+ SrcMap.erase(I);
+ }
}
-
- // If 'Reg' is previously source of a copy, it is no longer available for
- // copy propagation.
- SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
}
+
+ // Any previous copy definition or reading the Defs is no longer available.
+ for (unsigned Reg : Defs)
+ ClobberRegister(Reg);
}
// If MBB doesn't have successors, delete the copies whose defs are not used.
// If MBB does have successors, then conservative assume the defs are live-out
// since we don't want to trust live-in lists.
if (MBB.succ_empty()) {
- for (SmallSetVector<MachineInstr*, 8>::iterator
- DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
- DI != DE; ++DI) {
- if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
- (*DI)->eraseFromParent();
- Changed = true;
- ++NumDeletes;
- }
+ for (MachineInstr *MaybeDead : MaybeDeadCopies) {
+ assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+ MaybeDead->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
}
}
- return Changed;
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
}
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
- bool Changed = false;
+ Changed = false;
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= CopyPropagateBlock(*I);
+ for (MachineBasicBlock &MBB : MF)
+ CopyPropagateBlock(MBB);
return Changed;
}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index 3f04bb0..303a6a9 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -15,9 +15,20 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+// Always verify dominfo if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyMachineDomInfo = true;
+#else
+static bool VerifyMachineDomInfo = false;
+#endif
+static cl::opt<bool, true> VerifyMachineDomInfoX(
+ "verify-machine-dom-info", cl::location(VerifyMachineDomInfo),
+ cl::desc("Verify machine dominator info (time consuming)"));
+
namespace llvm {
template class DomTreeNodeBase<MachineBasicBlock>;
template class DominatorTreeBase<MachineBasicBlock>;
@@ -57,6 +68,11 @@ void MachineDominatorTree::releaseMemory() {
DT->releaseMemory();
}
+void MachineDominatorTree::verifyAnalysis() const {
+ if (VerifyMachineDomInfo)
+ verifyDomTree();
+}
+
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
DT->print(OS);
}
@@ -125,3 +141,17 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
NewBBs.clear();
CriticalEdgesToSplit.clear();
}
+
+void MachineDominatorTree::verifyDomTree() const {
+ MachineFunction &F = *getRoot()->getParent();
+
+ MachineDominatorTree OtherDT;
+ OtherDT.DT->recalculate(F);
+ if (compare(OtherDT)) {
+ errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
+ print(errs(), nullptr);
+ errs() << "\nActual:\n";
+ OtherDT.print(errs(), nullptr);
+ abort();
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index f6604f3..a7c63ef 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,6 +54,30 @@ static cl::opt<unsigned>
void MachineFunctionInitializer::anchor() {}
+void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
+ // Leave this function even in NDEBUG as an out-of-line anchor.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ for (BitVector::size_type i = 0; i < Properties.size(); ++i) {
+ bool HasProperty = Properties[i];
+ if (OnlySet && !HasProperty)
+ continue;
+ switch(static_cast<Property>(i)) {
+ case Property::IsSSA:
+ ROS << (HasProperty ? "SSA, " : "Post SSA, ");
+ break;
+ case Property::TracksLiveness:
+ ROS << (HasProperty ? "" : "not ") << "tracking liveness, ";
+ break;
+ case Property::AllVRegsAllocated:
+ ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs");
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+}
+
//===----------------------------------------------------------------------===//
// MachineFunction implementation
//===----------------------------------------------------------------------===//
@@ -65,20 +89,34 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MBB->getParent()->DeleteMachineBasicBlock(MBB);
}
+static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
+ const Function *Fn) {
+ if (Fn->hasFnAttribute(Attribute::StackAlignment))
+ return Fn->getFnStackAlignment();
+ return STI->getFrameLowering()->getStackAlignment();
+}
+
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
unsigned FunctionNum, MachineModuleInfo &mmi)
: Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
MMI(mmi) {
+ // Assume the function starts in SSA form with correct liveness.
+ Properties.set(MachineFunctionProperties::Property::IsSSA);
+ Properties.set(MachineFunctionProperties::Property::TracksLiveness);
if (STI->getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(this);
else
RegInfo = nullptr;
MFInfo = nullptr;
- FrameInfo = new (Allocator)
- MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(),
- STI->getFrameLowering()->isStackRealignable(),
- !F->hasFnAttribute("no-realign-stack"));
+ // We can realign the stack if the target supports it and the user hasn't
+ // explicitly asked us not to.
+ bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
+ !F->hasFnAttribute("no-realign-stack");
+ FrameInfo = new (Allocator) MachineFrameInfo(
+ getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
+ /*ForceRealign=*/CanRealignSP &&
+ F->hasFnAttribute(Attribute::StackAlignment));
if (Fn->hasFnAttribute(Attribute::StackAlignment))
FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
@@ -209,9 +247,9 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
}
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
-MachineInstr *
-MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
- DebugLoc DL, bool NoImp) {
+MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ const DebugLoc &DL,
+ bool NoImp) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
MachineInstr(*this, MCID, DL, NoImp);
}
@@ -256,13 +294,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
BasicBlockRecycler.Deallocate(Allocator, MBB);
}
-MachineMemOperand *
-MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
- uint64_t s, unsigned base_alignment,
- const AAMDNodes &AAInfo,
- const MDNode *Ranges) {
- return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
- AAInfo, Ranges);
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
+ unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges);
}
MachineMemOperand *
@@ -358,7 +394,7 @@ const char *MachineFunction::createExternalSymbolName(StringRef Name) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineFunction::dump() const {
+LLVM_DUMP_METHOD void MachineFunction::dump() const {
print(dbgs());
}
#endif
@@ -368,14 +404,11 @@ StringRef MachineFunction::getName() const {
return getFunction()->getName();
}
-void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
OS << "# Machine code for function " << getName() << ": ";
- if (RegInfo) {
- OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
- if (!RegInfo->tracksLiveness())
- OS << ", not tracking liveness";
- }
- OS << '\n';
+ OS << "Properties: <";
+ getProperties().print(OS);
+ OS << ">\n";
// Print Frame Information
FrameInfo->print(*this, OS);
@@ -523,7 +556,7 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
/// Make sure the function is at least Align bytes aligned.
void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
- if (!StackRealignable || !RealignOption)
+ if (!StackRealignable)
assert(Align <= StackAlignment &&
"For targets without stack realignment, Align is out of limit!");
if (MaxAlignment < Align) MaxAlignment = Align;
@@ -545,8 +578,7 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
bool isSS, const AllocaInst *Alloca) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
- Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
- Alignment, StackAlignment);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
!isSS));
int Index = (int)Objects.size() - NumFixedObjects - 1;
@@ -559,8 +591,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
/// returning a nonnegative identifier to represent it.
int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
unsigned Alignment) {
- Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
- Alignment, StackAlignment);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
ensureMaxAlignment(Alignment);
@@ -573,8 +604,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
- Alignment = clampStackAlignment(!StackRealignable || !RealignOption,
- Alignment, StackAlignment);
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
@@ -590,10 +620,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// The alignment of the frame index can be determined from its offset from
// the incoming frame position. If the frame object is at offset 32 and
// the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned Align = MinAlign(SPOffset, StackAlignment);
- Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
- StackAlignment);
+ // object is 16-byte aligned. Note that unlike the non-fixed case, if the
+ // stack needs realignment, we can't assume that the stack will in fact be
+ // aligned.
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
/*Alloca*/ nullptr, isAliased));
@@ -604,9 +635,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
/// Returns an index with a negative value.
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset) {
- unsigned Align = MinAlign(SPOffset, StackAlignment);
- Align = clampStackAlignment(!StackRealignable || !RealignOption, Align,
- StackAlignment);
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
/*Immutable*/ true,
/*isSS*/ true,
@@ -819,7 +849,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineJumpTableInfo::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); }
#endif
@@ -852,6 +882,8 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
return SectionKind::getMergeableConst8();
case 16:
return SectionKind::getMergeableConst16();
+ case 32:
+ return SectionKind::getMergeableConst32();
default:
return SectionKind::getReadOnly();
}
@@ -895,17 +927,17 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// the constant folding APIs to do this so that we get the benefit of
// DataLayout.
if (isa<PointerType>(A->getType()))
- A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant *>(A), DL);
+ A = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(A), IntTy, DL);
else if (A->getType() != IntTy)
- A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant *>(A), DL);
+ A = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(A),
+ IntTy, DL);
if (isa<PointerType>(B->getType()))
- B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant *>(B), DL);
+ B = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(B), IntTy, DL);
else if (B->getType() != IntTy)
- B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant *>(B), DL);
+ B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B),
+ IntTy, DL);
return A == B;
}
@@ -966,5 +998,5 @@ void MachineConstantPool::print(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineConstantPool::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); }
#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 05463fc..228fe17 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -21,11 +21,13 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+
using namespace llvm;
Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
@@ -40,7 +42,26 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
return false;
MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
- return runOnMachineFunction(MF);
+ MachineFunctionProperties &MFProps = MF.getProperties();
+
+#ifndef NDEBUG
+ if (!MFProps.verifyRequiredProperties(RequiredProperties)) {
+ errs() << "MachineFunctionProperties required by " << getPassName()
+ << " pass are not met by function " << F.getName() << ".\n"
+ << "Required properties: ";
+ RequiredProperties.print(errs(), /*OnlySet=*/true);
+ errs() << "\nCurrent properties: ";
+ MFProps.print(errs());
+ errs() << "\n";
+ llvm_unreachable("MachineFunctionProperties check failed");
+ }
+#endif
+
+ bool RV = runOnMachineFunction(MF);
+
+ MFProps.set(SetProperties);
+ MFProps.clear(ClearedProperties);
+ return RV;
}
void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -53,13 +74,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
// because CodeGen overloads that to mean preserving the MachineBasicBlock
// CFG in addition to the LLVM IR CFG.
AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<DominanceFrontier>();
+ AU.addPreserved<DominanceFrontierWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<IVUsers>();
+ AU.addPreserved<IVUsersWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
AU.addPreserved<StackProtector>();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 6dca74d..3cdf8d2 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -372,10 +373,16 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
getCImm()->getValue().print(OS, false);
break;
case MachineOperand::MO_FPImmediate:
- if (getFPImm()->getType()->isFloatTy())
+ if (getFPImm()->getType()->isFloatTy()) {
OS << getFPImm()->getValueAPF().convertToFloat();
- else
+ } else if (getFPImm()->getType()->isHalfTy()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ bool Unused;
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused);
+ OS << "half " << APF.convertToFloat();
+ } else {
OS << getFPImm()->getValueAPF().convertToDouble();
+ }
break;
case MachineOperand::MO_MachineBasicBlock:
OS << "<BB#" << getMBB()->getNumber() << ">";
@@ -490,13 +497,12 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
}
-MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
uint64_t s, unsigned int a,
const AAMDNodes &AAInfo,
const MDNode *Ranges)
- : PtrInfo(ptrinfo), Size(s),
- Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
- AAInfo(AAInfo), Ranges(Ranges) {
+ : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
+ AAInfo(AAInfo), Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
"invalid pointer value");
@@ -510,7 +516,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
ID.AddInteger(getOffset());
ID.AddInteger(Size);
ID.AddPointer(getOpaqueValue());
- ID.AddInteger(Flags);
+ ID.AddInteger(getFlags());
+ ID.AddInteger(getBaseAlignment());
}
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
@@ -521,8 +528,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
if (MMO->getBaseAlignment() >= getBaseAlignment()) {
// Update the alignment value.
- Flags = (Flags & ((1 << MOMaxBits) - 1)) |
- ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+ BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
PtrInfo = MMO->PtrInfo;
@@ -647,7 +653,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
: MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
- debugLoc(std::move(dl)) {
+ debugLoc(std::move(dl))
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ ,
+ Ty(nullptr)
+#endif
+{
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -664,10 +675,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
- Flags(0), AsmPrinterFlags(0),
- NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
- debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
+ MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc())
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ ,
+ Ty(nullptr)
+#endif
+{
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -690,6 +705,25 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
+// Implement dummy setter and getter for type when
+// global-isel is not built.
+// The proper implementation is WIP and is tracked here:
+// PR26576.
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+void MachineInstr::setType(Type *Ty) {}
+
+Type *MachineInstr::getType() const { return nullptr; }
+
+#else
+void MachineInstr::setType(Type *Ty) {
+ assert((!Ty || isPreISelGenericOpcode(getOpcode())) &&
+ "Non generic instructions are not supposed to be typed");
+ this->Ty = Ty;
+}
+
+Type *MachineInstr::getType() const { return Ty; }
+#endif // LLVM_BUILD_GLOBAL_ISEL
+
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
@@ -867,7 +901,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
}
/// Check to see if the MMOs pointed to by the two MemRefs arrays are
-/// identical.
+/// identical.
static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
@@ -894,7 +928,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
// cases in practice.
if (hasIdenticalMMOs(*this, Other))
return std::make_pair(MemRefs, NumMemRefs);
-
+
// TODO: consider uniquing elements within the operand lists to reduce
// space usage and fall back to conservative information less often.
size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
@@ -913,7 +947,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
MemEnd);
assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
"missing memrefs");
-
+
return std::make_pair(MemBegin, CombinedNumMemRefs);
}
@@ -933,23 +967,23 @@ bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
}
}
-bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
MICheckType Check) const {
// If opcodes or number of operands are not the same then the two
// instructions are obviously not identical.
- if (Other->getOpcode() != getOpcode() ||
- Other->getNumOperands() != getNumOperands())
+ if (Other.getOpcode() != getOpcode() ||
+ Other.getNumOperands() != getNumOperands())
return false;
if (isBundle()) {
// Both instructions are bundles, compare MIs inside the bundle.
MachineBasicBlock::const_instr_iterator I1 = getIterator();
MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
- MachineBasicBlock::const_instr_iterator I2 = Other->getIterator();
- MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
+ MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end();
while (++I1 != E1 && I1->isInsideBundle()) {
++I2;
- if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check))
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check))
return false;
}
}
@@ -957,7 +991,7 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
// Check operands to make sure they match.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- const MachineOperand &OMO = Other->getOperand(i);
+ const MachineOperand &OMO = Other.getOperand(i);
if (!MO.isReg()) {
if (!MO.isIdenticalTo(OMO))
return false;
@@ -990,8 +1024,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
}
// If DebugLoc does not match then two dbg.values are not identical.
if (isDebugValue())
- if (getDebugLoc() && Other->getDebugLoc() &&
- getDebugLoc() != Other->getDebugLoc())
+ if (getDebugLoc() && Other.getDebugLoc() &&
+ getDebugLoc() != Other.getDebugLoc())
return false;
return true;
}
@@ -1130,6 +1164,16 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
return -1;
}
+const DILocalVariable *MachineInstr::getDebugVariable() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return cast<DILocalVariable>(getOperand(2).getMetadata());
+}
+
+const DIExpression *MachineInstr::getDebugExpression() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return cast<DIExpression>(getOperand(3).getMetadata());
+}
+
const TargetRegisterClass*
MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
@@ -1157,7 +1201,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
unsigned Flag = getOperand(FlagIdx).getImm();
unsigned RCID;
- if (InlineAsm::hasRegClassConstraint(Flag, RCID))
+ if ((InlineAsm::getKind(Flag) == InlineAsm::Kind_RegUse ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDef ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDefEarlyClobber) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID))
return TRI->getRegClass(RCID);
// Assume that all registers in a memory operand are pointers.
@@ -1173,7 +1220,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
// Check every operands inside the bundle if we have
// been asked to.
if (ExploreBundle)
- for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC;
+ for (ConstMIBundleOperands OpndIt(*this); OpndIt.isValid() && CurRC;
++OpndIt)
CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl(
OpndIt.getOperandNo(), Reg, CurRC, TII, TRI);
@@ -1219,11 +1266,24 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
unsigned MachineInstr::getBundleSize() const {
MachineBasicBlock::const_instr_iterator I = getIterator();
unsigned Size = 0;
- while (I->isBundledWithSucc())
- ++Size, ++I;
+ while (I->isBundledWithSucc()) {
+ ++Size;
+ ++I;
+ }
return Size;
}
+/// Returns true if the MachineInstr has an implicit-use operand of exactly
+/// the given register (not considering sub/super-registers).
+bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
+
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
@@ -1498,12 +1558,10 @@ bool MachineInstr::hasOrderedMemoryRef() const {
if (memoperands_empty())
return true;
- // Check the memory reference information for ordered references.
- for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
- if (!(*I)->isUnordered())
- return true;
-
- return false;
+ // Check if any of our memory operands are ordered.
+ return any_of(memoperands(), [](const MachineMemOperand *MMO) {
+ return !MMO->isUnordered();
+ });
}
/// isInvariantLoad - Return true if this instruction is loading from a
@@ -1523,23 +1581,21 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
- for (mmo_iterator I = memoperands_begin(),
- E = memoperands_end(); I != E; ++I) {
- if ((*I)->isVolatile()) return false;
- if ((*I)->isStore()) return false;
- if ((*I)->isInvariant()) return true;
-
+ for (MachineMemOperand *MMO : memoperands()) {
+ if (MMO->isVolatile()) return false;
+ if (MMO->isStore()) return false;
+ if (MMO->isInvariant()) continue;
// A load from a constant PseudoSourceValue is invariant.
- if (const PseudoSourceValue *PSV = (*I)->getPseudoValue())
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
if (PSV->isConstant(MFI))
continue;
- if (const Value *V = (*I)->getValue()) {
+ if (const Value *V = MMO->getValue()) {
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA &&
AA->pointsToConstantMemory(
- MemoryLocation(V, (*I)->getSize(), (*I)->getAAInfo())))
+ MemoryLocation(V, MMO->getSize(), MMO->getAAInfo())))
continue;
}
@@ -1598,16 +1654,16 @@ bool MachineInstr::allDefsAreDead() const {
/// copyImplicitOps - Copy implicit register operands from specified
/// instruction to this instruction.
void MachineInstr::copyImplicitOps(MachineFunction &MF,
- const MachineInstr *MI) {
- for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ const MachineInstr &MI) {
+ for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands();
i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand &MO = MI.getOperand(i);
if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
addOperand(MF, MO);
}
}
-void MachineInstr::dump() const {
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " " << *this;
#endif
@@ -1651,8 +1707,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, MST, TRI);
unsigned Reg = getOperand(StartOp).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
VirtRegs.push_back(Reg);
+ unsigned Size;
+ if (MRI && (Size = MRI->getSize(Reg)))
+ OS << '(' << Size << ')';
+ }
}
if (StartOp != 0)
@@ -1664,6 +1724,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else
OS << "UNKNOWN";
+ if (getType()) {
+ OS << ' ';
+ getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
+ OS << ' ';
+ }
+
if (SkipOpers)
return;
@@ -1686,6 +1752,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " [mayload]";
if (ExtraInfo & InlineAsm::Extra_MayStore)
OS << " [maystore]";
+ if (ExtraInfo & InlineAsm::Extra_IsConvergent)
+ OS << " [isconvergent]";
if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
OS << " [alignstack]";
if (getInlineAsmDialect() == InlineAsm::AD_ATT)
@@ -1761,13 +1829,41 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
unsigned RCID = 0;
- if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID)) {
if (TRI) {
OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
} else
OS << ":RC" << RCID;
}
+ if (InlineAsm::isMemKind(Flag)) {
+ unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ switch (MCID) {
+ case InlineAsm::Constraint_es: OS << ":es"; break;
+ case InlineAsm::Constraint_i: OS << ":i"; break;
+ case InlineAsm::Constraint_m: OS << ":m"; break;
+ case InlineAsm::Constraint_o: OS << ":o"; break;
+ case InlineAsm::Constraint_v: OS << ":v"; break;
+ case InlineAsm::Constraint_Q: OS << ":Q"; break;
+ case InlineAsm::Constraint_R: OS << ":R"; break;
+ case InlineAsm::Constraint_S: OS << ":S"; break;
+ case InlineAsm::Constraint_T: OS << ":T"; break;
+ case InlineAsm::Constraint_Um: OS << ":Um"; break;
+ case InlineAsm::Constraint_Un: OS << ":Un"; break;
+ case InlineAsm::Constraint_Uq: OS << ":Uq"; break;
+ case InlineAsm::Constraint_Us: OS << ":Us"; break;
+ case InlineAsm::Constraint_Ut: OS << ":Ut"; break;
+ case InlineAsm::Constraint_Uv: OS << ":Uv"; break;
+ case InlineAsm::Constraint_Uy: OS << ":Uy"; break;
+ case InlineAsm::Constraint_X: OS << ":X"; break;
+ case InlineAsm::Constraint_Z: OS << ":Z"; break;
+ case InlineAsm::Constraint_ZC: OS << ":ZC"; break;
+ case InlineAsm::Constraint_Zy: OS << ":Zy"; break;
+ default: OS << ":?"; break;
+ }
+ }
+
unsigned TiedTo = 0;
if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
OS << " tiedto:$" << TiedTo;
@@ -1824,11 +1920,18 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
HaveSemi = true;
}
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
- const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << TRI->getRegClassName(RC)
- << ':' << PrintReg(VirtRegs[i]);
+ const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]);
+ if (!RC)
+ continue;
+ // Generic virtual registers do not have register classes.
+ if (RC.is<const RegisterBank *>())
+ OS << " " << RC.get<const RegisterBank *>()->getName();
+ else
+ OS << " "
+ << TRI->getRegClassName(RC.get<const TargetRegisterClass *>());
+ OS << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
- if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) {
++j;
continue;
}
@@ -1877,6 +1980,13 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef())
continue;
+
+ // DEBUG_VALUE nodes do not contribute to code generation and should
+ // always be ignored. Failure to do so may result in trying to modify
+ // KILL flags on DEBUG_VALUE nodes.
+ if (MO.isDebug())
+ continue;
+
unsigned Reg = MO.getReg();
if (!Reg)
continue;
@@ -1932,7 +2042,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg,
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
unsigned OpReg = MO.getReg();
- if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+ if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
MO.setIsKill(false);
}
}
@@ -2085,3 +2195,42 @@ void MachineInstr::emitError(StringRef Msg) const {
return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
report_fatal_error(Msg);
}
+
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ unsigned Reg, unsigned Offset,
+ const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (IsIndirect)
+ return BuildMI(MF, DL, MCID)
+ .addReg(Reg, RegState::Debug)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+ else {
+ assert(Offset == 0 && "A direct address cannot have an offset.");
+ return BuildMI(MF, DL, MCID)
+ .addReg(Reg, RegState::Debug)
+ .addReg(0U, RegState::Debug)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+ }
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect, unsigned Reg,
+ unsigned Offset, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI =
+ BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, MI);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 4619daf..e4686b3 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
using namespace llvm;
namespace {
@@ -24,7 +25,7 @@ namespace {
public:
static char ID; // Pass identification
UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
- : MachineFunctionPass(ID), PredicateFtor(Ftor) {
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
}
@@ -78,7 +79,7 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *
llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
- return new UnpackMachineBundles(Ftor);
+ return new UnpackMachineBundles(std::move(Ftor));
}
namespace {
@@ -293,7 +294,7 @@ MachineOperandIteratorBase::PhysRegInfo
MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
- PhysRegInfo PRI = {false, false, false, false, false, false, false};
+ PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
@@ -332,8 +333,12 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
}
}
- if (AllDefsDead && PRI.FullyDefined)
- PRI.DeadDef = true;
+ if (AllDefsDead) {
+ if (PRI.FullyDefined || PRI.Clobbered)
+ PRI.DeadDef = true;
+ else if (PRI.Defined)
+ PRI.PartialDeadDef = true;
+ }
return PRI;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 99a97d2..119751b 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -260,7 +260,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
}
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
Changed = FirstInLoop = false;
@@ -428,7 +428,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
if (Def && !RuledOut) {
int FI = INT_MIN;
if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
- (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
Candidates.push_back(CandidateInfo(MI, Def, FI));
}
}
@@ -581,14 +581,14 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
}
void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
// Remember livein register pressure.
BackTrace.push_back(RegPressure);
}
void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
BackTrace.pop_back();
}
@@ -764,7 +764,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
if (BB->pred_size() == 1) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
InitRegPressure(*BB->pred_begin());
}
@@ -982,7 +982,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
if (MOReg != Reg)
continue;
- if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i))
+ if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
return true;
}
@@ -996,7 +996,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
/// Return true if the instruction is marked "cheap" or the operand latency
/// between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
- if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
+ if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
return true;
bool isCheap = false;
@@ -1010,7 +1010,7 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
- if (!TII->hasLowDefLatency(SchedModel, &MI, i))
+ if (!TII->hasLowDefLatency(SchedModel, MI, i))
return false;
isCheap = true;
}
@@ -1086,7 +1086,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// Rematerializable instructions should always be hoisted since the register
// allocator can just pull them down again when needed.
- if (TII->isTriviallyReMaterializable(&MI, AA))
+ if (TII->isTriviallyReMaterializable(MI, AA))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@@ -1139,8 +1139,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA) &&
- !MI.isInvariantLoad(AA)) {
+ if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) {
DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
}
@@ -1171,17 +1170,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
&LoadRegIndex);
if (NewOpc == 0) return nullptr;
const MCInstrDesc &MID = TII->get(NewOpc);
- if (MID.getNumDefs() != 1) return nullptr;
MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- bool Success =
- TII->unfoldMemoryOperand(MF, MI, Reg,
- /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
- NewMIs);
+ bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs);
(void)Success;
assert(Success &&
"unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
@@ -1222,7 +1219,7 @@ const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (const MachineInstr *PrevMI : PrevMIs)
- if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
+ if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
return nullptr;
@@ -1317,12 +1314,10 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// terminator instructions.
DEBUG({
dbgs() << "Hoisting " << *MI;
- if (Preheader->getBasicBlock())
- dbgs() << " to MachineBasicBlock "
- << Preheader->getName();
if (MI->getParent()->getBasicBlock())
- dbgs() << " from MachineBasicBlock "
- << MI->getParent()->getName();
+ dbgs() << " from BB#" << MI->getParent()->getNumber();
+ if (Preheader->getBasicBlock())
+ dbgs() << " to BB#" << Preheader->getNumber();
dbgs() << "\n";
});
@@ -1382,7 +1377,7 @@ MachineBasicBlock *MachineLICM::getCurPreheader() {
return nullptr;
}
- CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
if (!CurPreheader) {
CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
return nullptr;
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 2f5c9e0..376f78f 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -50,11 +50,12 @@ void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
MachineBasicBlock *MachineLoop::getTopBlock() {
MachineBasicBlock *TopMBB = getHeader();
MachineFunction::iterator Begin = TopMBB->getParent()->begin();
- if (TopMBB != Begin) {
+ if (TopMBB->getIterator() != Begin) {
MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
while (contains(PriorMBB)) {
TopMBB = PriorMBB;
- if (TopMBB == Begin) break;
+ if (TopMBB->getIterator() == Begin)
+ break;
PriorMBB = &*std::prev(TopMBB->getIterator());
}
}
@@ -64,7 +65,7 @@ MachineBasicBlock *MachineLoop::getTopBlock() {
MachineBasicBlock *MachineLoop::getBottomBlock() {
MachineBasicBlock *BotMBB = getHeader();
MachineFunction::iterator End = BotMBB->getParent()->end();
- if (BotMBB != std::prev(End)) {
+ if (BotMBB->getIterator() != std::prev(End)) {
MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
while (contains(NextMBB)) {
BotMBB = NextMBB;
@@ -77,7 +78,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineLoop::dump() const {
+LLVM_DUMP_METHOD void MachineLoop::dump() const {
print(dbgs());
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 1956a70..244e3fb 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -396,7 +396,8 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
- --j, --e;
+ --j;
+ --e;
}
// Remove landing pads with no try-ranges.
diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 01d2c2e..fc32183 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -104,8 +104,8 @@ void MachineRegionInfoPass::verifyAnalysis() const {
void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTree>();
- AU.addRequired<DominanceFrontier>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<DominanceFrontierWrapperPass>();
}
void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
@@ -113,7 +113,7 @@ void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineRegionInfoPass::dump() const {
+LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
RI.dump();
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 03c82f4..613598d 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -24,9 +24,8 @@ using namespace llvm;
// Pin the vtable to this file.
void MachineRegisterInfo::Delegate::anchor() {}
-MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true),
- TracksSubRegLiveness(false) {
+MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
+ : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -42,6 +41,11 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
VRegInfo[Reg].first = RC;
}
+void MachineRegisterInfo::setRegBank(unsigned Reg,
+ const RegisterBank &RegBank) {
+ VRegInfo[Reg].first = &RegBank;
+}
+
const TargetRegisterClass *
MachineRegisterInfo::constrainRegClass(unsigned Reg,
const TargetRegisterClass *RC,
@@ -103,6 +107,32 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
return Reg;
}
+unsigned
+MachineRegisterInfo::getSize(unsigned VReg) const {
+ VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg);
+ return SizeIt != getVRegToSize().end() ? SizeIt->second : 0;
+}
+
+void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) {
+ getVRegToSize()[VReg] = Size;
+}
+
+unsigned
+MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) {
+ assert(Size && "Cannot create empty virtual register");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ // FIXME: Should we use a dummy register class?
+ VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr);
+ getVRegToSize()[Reg] = Size;
+ RegAllocHints.grow(Reg);
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ return Reg;
+}
+
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
@@ -471,13 +501,14 @@ static bool isNoReturnDef(const MachineOperand &MO) {
!Called->hasFnAttribute(Attribute::NoUnwind));
}
-bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
+bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg,
+ bool SkipNoReturnDef) const {
if (UsedPhysRegMask.test(PhysReg))
return true;
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) {
- if (isNoReturnDef(MO))
+ if (!SkipNoReturnDef && isNoReturnDef(MO))
continue;
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 71a6eba..47ad60c 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index bcee15c..d921e29 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -23,13 +23,13 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include <queue>
using namespace llvm;
@@ -65,14 +65,20 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
+/// Avoid quadratic complexity in unusually large basic blocks by limiting the
+/// size of the ready lists.
+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
+ cl::desc("Limit ready list to N instructions"), cl::init(256));
+
static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
cl::desc("Enable register pressure scheduling."), cl::init(true));
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
cl::desc("Enable cyclic critical path analysis."), cl::init(true));
-static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
- cl::desc("Enable load clustering."), cl::init(true));
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
// Experimental heuristics
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
@@ -219,6 +225,11 @@ static cl::opt<bool> EnableMachineSched(
cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnablePostRAMachineSched(
+ "enable-post-misched",
+ cl::desc("Enable the post-ra machine instruction scheduling pass."),
+ cl::init(true), cl::Hidden);
+
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -314,6 +325,9 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
if (EnableMachineSched.getNumOccurrences()) {
if (!EnableMachineSched)
return false;
@@ -349,10 +363,13 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
}
bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
- if (skipOptnoneFunction(*mf.getFunction()))
+ if (skipFunction(*mf.getFunction()))
return false;
- if (!mf.getSubtarget().enablePostRAScheduler()) {
+ if (EnablePostRAMachineSched.getNumOccurrences()) {
+ if (!EnablePostRAMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enablePostRAScheduler()) {
DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -389,7 +406,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB,
MachineFunction *MF,
const TargetInstrInfo *TII) {
- return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
}
/// Main driver for both MachineScheduler and PostMachineScheduler.
@@ -427,7 +444,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
//
// MBB::size() uses instr_iterator to count. Here we need a bundle to count
// as a single instruction.
- unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
@@ -435,15 +451,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
if (RegionEnd != MBB->end() ||
isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
--RegionEnd;
- // Count the boundary instruction.
- --RemainingInstrs;
}
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingInstrs) {
+ for (;I != MBB->begin(); --I) {
if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
break;
if (!I->isDebugValue())
@@ -466,8 +480,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
- dbgs() << " RegionInstrs: " << NumRegionInstrs
- << " Remaining: " << RemainingInstrs << "\n");
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
errs() << ":BB# " << MBB->getNumber();
@@ -485,7 +498,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler.begin();
}
- assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler.finishBlock();
// FIXME: Ideally, no further passes should rely on kill flags. However,
// thumb2 size reduction is currently an exception, so the PostMIScheduler
@@ -640,7 +652,7 @@ void ScheduleDAGMI::moveInstruction(
// Update LiveIntervals
if (LIS)
- LIS->handleMove(MI, /*UpdateFlags=*/true);
+ LIS->handleMove(*MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
@@ -704,8 +716,7 @@ void ScheduleDAGMI::schedule() {
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
else
moveInstruction(MI, CurrentTop);
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -869,13 +880,19 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
SUPressureDiffs.clear();
ShouldTrackPressure = SchedImpl->shouldTrackPressure();
+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
+
+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
+ "ShouldTrackLaneMasks requires ShouldTrackPressure");
}
// Setup the register pressure trackers for the top scheduled top and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
- TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
- BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
+ ShouldTrackLaneMasks, false);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, false);
// Close the RPTracker to finalize live ins.
RPTracker.closeRegion();
@@ -905,7 +922,7 @@ void ScheduleDAGMILive::initRegPressure() {
// Account for liveness generated by the region boundary.
if (LiveRegionEnd != RegionEnd) {
- SmallVector<unsigned, 8> LiveUses;
+ SmallVector<RegisterMaskPair, 8> LiveUses;
BotRPTracker.recede(&LiveUses);
updatePressureDiffs(LiveUses);
}
@@ -969,47 +986,74 @@ updateScheduledPressure(const SUnit *SU,
/// Update the PressureDiff array for liveness after scheduling this
/// instruction.
-void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
- for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
+void ScheduleDAGMILive::updatePressureDiffs(
+ ArrayRef<RegisterMaskPair> LiveUses) {
+ for (const RegisterMaskPair &P : LiveUses) {
+ unsigned Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- unsigned Reg = LiveUses[LUIdx];
- DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
if (!TRI->isVirtualRegister(Reg))
continue;
- // This may be called before CurrentBottom has been initialized. However,
- // BotRPTracker must have a valid position. We want the value live into the
- // instruction or live out of the block, so ask for the previous
- // instruction's live-out.
- const LiveInterval &LI = LIS->getInterval(Reg);
- VNInfo *VNI;
- MachineBasicBlock::const_iterator I =
- nextIfDebug(BotRPTracker.getPos(), BB->end());
- if (I == BB->end())
- VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
- else {
- LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
- VNI = LRQ.valueIn();
- }
- // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
- assert(VNI && "No live value at use.");
- for (const VReg2SUnit &V2SU
- : make_range(VRegUses.find(Reg), VRegUses.end())) {
- SUnit *SU = V2SU.SU;
- // If this use comes before the reaching def, it cannot be a last use, so
- // descrease its pressure change.
- if (!SU->isScheduled && SU != &ExitSU) {
- LiveQueryResult LRQ
- = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
- if (LRQ.valueIn() == VNI) {
- PressureDiff &PDiff = getPressureDiff(SU);
- PDiff.addPressureChange(Reg, true, &MRI);
- DEBUG(
- dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
- << *SU->getInstr();
- dbgs() << " to ";
- PDiff.dump(*TRI);
- );
+ if (ShouldTrackLaneMasks) {
+ // If the register has just become live then other uses won't change
+ // this fact anymore => decrement pressure.
+ // If the register has just become dead then other uses make it come
+ // back to life => increment pressure.
+ bool Decrement = P.LaneMask != 0;
+
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit &SU = *V2SU.SU;
+ if (SU.isScheduled || &SU == &ExitSU)
+ continue;
+
+ PressureDiff &PDiff = getPressureDiff(&SU);
+ PDiff.addPressureChange(Reg, Decrement, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+ << ' ' << *SU.getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
+ } else {
+ assert(P.LaneMask != 0);
+ DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ // If this use comes before the reaching def, it cannot be a last use,
+ // so decrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
}
}
}
@@ -1057,11 +1101,6 @@ void ScheduleDAGMILive::schedule() {
// Initialize ready queues now that the DAG and priority data are finalized.
initQueues(TopRoots, BotRoots);
- if (ShouldTrackPressure) {
- assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
- TopRPTracker.setPos(CurrentTop);
- }
-
bool IsTopNode = false;
while (true) {
DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
@@ -1111,14 +1150,14 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() {
// Initialize the register pressure tracker used by buildSchedGraph.
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
- /*TrackUntiedDefs=*/true);
+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
// Account for liveness generate by the region boundary.
if (LiveRegionEnd != RegionEnd)
RPTracker.recede();
// Build the DAG, and compute current register pressure.
- buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
@@ -1167,10 +1206,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned MaxCyclicLatency = 0;
// Visit each live out vreg def to find def/use pairs that cross iterations.
- ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
- for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
- RI != RE; ++RI) {
- unsigned Reg = *RI;
+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
+ unsigned Reg = P.RegUnit;
if (!TRI->isVirtualRegister(Reg))
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
@@ -1193,8 +1230,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
continue;
// Only consider uses of the phi.
- LiveQueryResult LRQ =
- LI.Query(LIS->getInstructionIndex(SU->getInstr()));
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
@@ -1209,8 +1245,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
if (LiveInHeight > LiveOutHeight) {
if (LiveInHeight - LiveOutHeight < CyclicLatency)
CyclicLatency = LiveInHeight - LiveOutHeight;
- }
- else
+ } else
CyclicLatency = 0;
DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
@@ -1223,6 +1258,17 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
return MaxCyclicLatency;
}
+/// Release ExitSU predecessors and setup scheduler queues. Re-position
+/// the Top RP tracker in case the region beginning has changed.
+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
+}
+
/// Move an instruction and update register pressure.
void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
// Move the instruction to its new location in the instruction stream.
@@ -1239,7 +1285,18 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
if (ShouldTrackPressure) {
// Update top scheduled pressure.
- TopRPTracker.advance();
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ TopRPTracker.advance(RegOpers);
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
DEBUG(
dbgs() << "Top Pressure:\n";
@@ -1248,8 +1305,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
}
- }
- else {
+ } else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
@@ -1264,9 +1320,20 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
CurrentBottom = MI;
}
if (ShouldTrackPressure) {
- // Update bottom scheduled pressure.
- SmallVector<unsigned, 8> LiveUses;
- BotRPTracker.recede(&LiveUses);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ BotRPTracker.recedeSkipDebugValues();
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(RegOpers, &LiveUses);
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
DEBUG(
dbgs() << "Bottom Pressure:\n";
@@ -1280,64 +1347,81 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
}
//===----------------------------------------------------------------------===//
-// LoadClusterMutation - DAG post-processing to cluster loads.
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
//===----------------------------------------------------------------------===//
namespace {
/// \brief Post-process the DAG to create cluster edges between neighboring
-/// loads.
-class LoadClusterMutation : public ScheduleDAGMutation {
- struct LoadInfo {
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
SUnit *SU;
unsigned BaseReg;
- unsigned Offset;
- LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
- : SU(su), BaseReg(reg), Offset(ofs) {}
+ int64_t Offset;
+ MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
- bool operator<(const LoadInfo &RHS) const {
+ bool operator<(const MemOpInfo&RHS) const {
return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
}
};
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
public:
- LoadClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : TII(tii), TRI(tri) {}
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
- void apply(ScheduleDAGMI *DAG) override;
protected:
- void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
};
} // anonymous
-void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
- ScheduleDAGMI *DAG) {
- SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
- for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
- SUnit *SU = Loads[Idx];
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
+ SUnit *SU = MemOps[Idx];
unsigned BaseReg;
- unsigned Offset;
- if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
- LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
+ int64_t Offset;
+ if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
}
- if (LoadRecords.size() < 2)
+ if (MemOpRecords.size() < 2)
return;
- std::sort(LoadRecords.begin(), LoadRecords.end());
+
+ std::sort(MemOpRecords.begin(), MemOpRecords.end());
unsigned ClusterLength = 1;
- for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
- if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
ClusterLength = 1;
continue;
}
- SUnit *SUa = LoadRecords[Idx].SU;
- SUnit *SUb = LoadRecords[Idx+1].SU;
- if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
- && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
-
- DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
+ SUnit *SUa = MemOpRecords[Idx].SU;
+ SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+ ClusterLength) &&
+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+ DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
@@ -1351,22 +1435,26 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
- }
- else
+ } else
ClusterLength = 1;
}
}
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
-void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent loads.
+ // Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->mayLoad())
+ if ((IsLoad && !SU->getInstr()->mayLoad()) ||
+ (!IsLoad && !SU->getInstr()->mayStore()))
continue;
+
unsigned ChainPredID = DAG->SUnits.size();
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
@@ -1376,7 +1464,7 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
}
}
// Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
+ // before. ChainPredID==MaxNodeID at the top of the schedule.
unsigned NumChains = StoreChainDependents.size();
std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
@@ -1384,9 +1472,10 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
StoreChainDependents.resize(NumChains + 1);
StoreChainDependents[Result.first->second].push_back(SU);
}
+
// Iterate over the store chains.
for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
+ clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
}
//===----------------------------------------------------------------------===//
@@ -1403,7 +1492,7 @@ public:
MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
: TII(TII), TRI(TRI) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
} // anonymous
@@ -1423,7 +1512,9 @@ static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
-void MacroFusion::apply(ScheduleDAGMI *DAG) {
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
// For now, assume targets can only fuse with the branch.
SUnit &ExitSU = DAG->ExitSU;
MachineInstr *Branch = ExitSU.getInstr();
@@ -1439,7 +1530,7 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
if (!HasDataDep(TRI, *Branch, *Pred))
continue;
- if (!TII.shouldScheduleAdjacent(Pred, Branch))
+ if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1474,7 +1565,7 @@ class CopyConstrain : public ScheduleDAGMutation {
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
- void apply(ScheduleDAGMI *DAG) override;
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
@@ -1505,12 +1596,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *Copy = CopySU->getInstr();
// Check for pure vreg copies.
- unsigned SrcReg = Copy->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ const MachineOperand &SrcOp = Copy->getOperand(1);
+ unsigned SrcReg = SrcOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
return;
- unsigned DstReg = Copy->getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ const MachineOperand &DstOp = Copy->getOperand(0);
+ unsigned DstReg = DstOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -1627,15 +1720,16 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
/// \brief Callback from DAG postProcessing to create weak edges to encourage
/// copy elimination.
-void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
if (FirstPos == DAG->end())
return;
- RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
- &*priorNonDebug(DAG->end(), DAG->begin()));
+ *priorNonDebug(DAG->end(), DAG->begin()));
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
@@ -1862,7 +1956,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
+ Available.size() >= ReadyListLimit)
Pending.push(SU);
else
Available.push(SU);
@@ -1905,8 +2000,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (!HazardRec->isEnabled()) {
// Bypass HazardRec virtual calls.
CurrCycle = NextCycle;
- }
- else {
+ } else {
// Bypass getHazardType calls in case of long latency.
for (; CurrCycle != NextCycle; ++CurrCycle) {
if (isTop())
@@ -2074,8 +2168,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// If we stall for any reason, bump the cycle.
if (NextCycle > CurrCycle) {
bumpCycle(NextCycle);
- }
- else {
+ } else {
// After updating ZoneCritResIdx and ExpectedLatency, check if we're
// resource limited. If a stall occurred, bumpCycle does this.
unsigned LFactor = SchedModel->getLatencyFactor();
@@ -2119,11 +2212,13 @@ void SchedBoundary::releasePending() {
if (checkHazard(SU))
continue;
+ if (Available.size() >= ReadyListLimit)
+ break;
+
Available.push(SU);
Pending.remove(Pending.begin()+i);
--i; --e;
}
- DEBUG(if (!Pending.empty()) Pending.dump());
CheckPending = false;
}
@@ -2163,6 +2258,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
bumpCycle(CurrCycle + 1);
releasePending();
}
+
+ DEBUG(Pending.dump());
+ DEBUG(Available.dump());
+
if (Available.size() == 1)
return *Available.begin();
return nullptr;
@@ -2177,8 +2276,7 @@ void SchedBoundary::dumpScheduledState() {
if (ZoneCritResIdx) {
ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
ResCount = getResourceCount(ZoneCritResIdx);
- }
- else {
+ } else {
ResFactor = SchedModel->getMicroOpFactor();
ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
}
@@ -2218,8 +2316,7 @@ initResourceDelta(const ScheduleDAGMI *DAG,
/// Set the CandPolicy given a scheduling zone given the current resources and
/// latencies inside and outside the zone.
-void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
- bool IsPostRA,
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
SchedBoundary &CurrZone,
SchedBoundary *OtherZone) {
// Apply preemptive heuristics based on the total latency and resources
@@ -2295,7 +2392,8 @@ const char *GenericSchedulerBase::getReasonStr(
GenericSchedulerBase::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
- case PhysRegCopy: return "PREG-COPY";
+ case Only1: return "ONLY1 ";
+ case PhysRegCopy: return "PREG-COPY ";
case RegExcess: return "REG-EXCESS";
case RegCritical: return "REG-CRIT ";
case Stall: return "STALL ";
@@ -2381,7 +2479,6 @@ static bool tryLess(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2398,7 +2495,6 @@ static bool tryGreater(int TryVal, int CandVal,
Cand.Reason = Reason;
return true;
}
- Cand.setRepeat(Reason);
return false;
}
@@ -2414,8 +2510,7 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
return true;
- }
- else {
+ } else {
if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
@@ -2428,10 +2523,13 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
return false;
}
-static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
- bool IsTop) {
+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
+ tracePick(Cand.Reason, Cand.AtTop);
}
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
@@ -2460,6 +2558,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
Itin, DAG);
}
+ TopCand.SU = nullptr;
+ BotCand.SU = nullptr;
}
/// Initialize the per-region scheduling policy.
@@ -2487,8 +2587,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
RegionPolicy.OnlyBottomUp = true;
// Allow the subtarget to override default policy.
- MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
- NumRegionInstrs);
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
// After subtarget overrides, apply command line options.
if (!EnableRegPressure)
@@ -2582,19 +2681,25 @@ static bool tryPressure(const PressureChange &TryP,
GenericSchedulerBase::CandReason Reason,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) {
- unsigned TryPSet = TryP.getPSetOrMax();
- unsigned CandPSet = CandP.getPSetOrMax();
- // If both candidates affect the same set, go with the smallest increase.
- if (TryPSet == CandPSet) {
- return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
- Reason);
- }
// If one candidate decreases and the other increases, go with it.
// Invalid candidates have UnitInc==0.
if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
Reason)) {
return true;
}
+ // Do not compare the magnitude of pressure changes between top and bottom
+ // boundary.
+ if (Cand.AtTop != TryCand.AtTop)
+ return false;
+
+ // If both candidates affect the same set in the same boundary, go with the
+ // smallest increase.
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
+ if (TryPSet == CandPSet) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
std::numeric_limits<int>::max();
@@ -2640,64 +2745,64 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
return 0;
}
-/// Apply a set of heursitics to a new candidate. Heuristics are currently
-/// hierarchical. This may be more efficient than a graduated cost model because
-/// we don't need to evaluate all aspects of the model for each node in the
-/// queue. But it's really done to make the heuristics easier to debug and
-/// statistically analyze.
-///
-/// \param Cand provides the policy and current best candidate.
-/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
-/// \param Zone describes the scheduled zone that we are extending.
-/// \param RPTracker describes reg pressure within the scheduled zone.
-/// \param TempTracker is a scratch pressure tracker to reuse in queries.
-void GenericScheduler::tryCandidate(SchedCandidate &Cand,
- SchedCandidate &TryCand,
- SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- RegPressureTracker &TempTracker) {
-
+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
if (DAG->isTrackingPressure()) {
- // Always initialize TryCand's RPDelta.
- if (Zone.isTop()) {
+ if (AtTop) {
TempTracker.getMaxDownwardPressureDelta(
- TryCand.SU->getInstr(),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
if (VerifyScheduling) {
TempTracker.getMaxUpwardPressureDelta(
- TryCand.SU->getInstr(),
- &DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ &DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
- }
- else {
+ } else {
RPTracker.getUpwardPressureDelta(
- TryCand.SU->getInstr(),
- DAG->getPressureDiff(TryCand.SU),
- TryCand.RPDelta,
+ Cand.SU->getInstr(),
+ DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
DAG->getRegionCriticalPSets(),
DAG->getRegPressure().MaxSetPressure);
}
}
}
- DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
- << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
- << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
+ DEBUG(if (Cand.RPDelta.Excess.isValid())
+ dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
+ << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
+}
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending, or nullptr
+// if Cand is from a different zone than TryCand.
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
TryCand.Reason = NodeOrder;
return;
}
- if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
- biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
+ biasPhysRegCopy(Cand.SU, Cand.AtTop),
TryCand, Cand, PhysRegCopy))
return;
@@ -2715,17 +2820,26 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // For loops that are acyclic path limited, aggressively schedule for latency.
- // This can result in very long dependence chains scheduled in sequence, so
- // once every cycle (when CurrMOps == 0), switch to normal heuristics.
- if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
- && tryLatency(TryCand, Cand, Zone))
- return;
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. This can result in very long dependence chains scheduled in
+ // sequence, so once every cycle (when CurrMOps == 0), switch to normal
+ // heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prioritize instructions that read unbuffered resources by stall cycles.
- if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
- Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
- return;
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
// Keep clustered nodes together to encourage downstream peephole
// optimizations which may reduce resource requirements.
@@ -2733,18 +2847,23 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const SUnit *NextClusterSU =
- Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
- if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU,
TryCand, Cand, Cluster))
return;
- // Weak edges are for clustering and other constraints.
- if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
- getWeakLeft(Cand.SU, Zone.isTop()),
- TryCand, Cand, Weak)) {
- return;
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop),
+ TryCand, Cand, Weak))
+ return;
}
+
// Avoid increasing the max pressure of the entire region.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
Cand.RPDelta.CurrentMax,
@@ -2752,34 +2871,35 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
DAG->MF))
return;
- // Avoid critical resource consumption and balance the schedule.
- TryCand.initResourceDelta(DAG, SchedModel);
- if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
- TryCand, Cand, ResourceReduce))
- return;
- if (tryGreater(TryCand.ResDelta.DemandedResources,
- Cand.ResDelta.DemandedResources,
- TryCand, Cand, ResourceDemand))
- return;
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
- // Avoid serializing long latency dependence chains.
- // For acyclic path limited loops, latency was already checked above.
- if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
- !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
- return;
- }
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
- // Prefer immediate defs/users of the last scheduled instruction. This is a
- // local pressure avoidance strategy that also makes the machine code
- // readable.
- if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
- TryCand, Cand, NextDefUse))
- return;
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
+ if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
- // Fall through to original instruction order.
- if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
- || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
- TryCand.Reason = NodeOrder;
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
}
}
@@ -2789,20 +2909,20 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- ReadyQueue &Q = Zone.Available;
-
- DEBUG(Q.dump());
-
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+ ReadyQueue &Q = Zone.Available;
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
- SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
- tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryCandidate(Cand, TryCand, ZoneArg);
if (TryCand.Reason != NoCand) {
// Initialize resource delta if needed in case future heuristics query it.
if (TryCand.ResDelta == SchedResourceDelta())
@@ -2819,57 +2939,77 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
- DEBUG(dbgs() << "Pick Bot ONLY1\n");
+ tracePick(Only1, false);
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
- DEBUG(dbgs() << "Pick Top ONLY1\n");
+ tracePick(Only1, true);
return SU;
}
- CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- SchedCandidate TopCand(NoPolicy);
// Set the bottom-up policy based on the state of the current bottom zone and
// the instructions outside the zone, including the top zone.
- setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
- setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
-
- // Prefer bottom scheduling when heuristics are silent.
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
- assert(BotCand.Reason != NoCand && "failed to find the first candidate");
-
- // If either Q has a single candidate that provides the least increase in
- // Excess pressure, we can immediately schedule from that Q.
- //
- // RegionCriticalPSets summarizes the pressure within the scheduled region and
- // affects picking from either Q. If scheduling in one direction must
- // increase pressure for one of the excess PSets, then schedule in that
- // direction first to provide more freedom in the other direction.
- if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
- || (BotCand.Reason == RegCritical
- && !BotCand.isRepeat(RegCritical)))
- {
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
}
+
// Check if the top Q has a better candidate.
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
- assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
- // Choose the queue with the most important (lowest enum) reason.
- if (TopCand.Reason < BotCand.Reason) {
- IsTopNode = true;
- tracePick(TopCand, IsTopNode);
- return TopCand.SU;
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
+ DEBUG(traceCandidate(Cand));
}
- // Otherwise prefer the bottom candidate, in node order if all else failed.
- IsTopNode = false;
- tracePick(BotCand, IsTopNode);
- return BotCand.SU;
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
}
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
@@ -2885,27 +3025,25 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
SU = Top.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate TopCand(NoPolicy);
- pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
IsTopNode = true;
- }
- else if (RegionPolicy.OnlyBottomUp) {
+ } else if (RegionPolicy.OnlyBottomUp) {
SU = Bot.pickOnlyChoice();
if (!SU) {
CandPolicy NoPolicy;
- SchedCandidate BotCand(NoPolicy);
- pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
assert(BotCand.Reason != NoCand && "failed to find a candidate");
- tracePick(BotCand, false);
+ tracePick(BotCand);
SU = BotCand.SU;
}
IsTopNode = false;
- }
- else {
+ } else {
SU = pickNodeBidirectional(IsTopNode);
}
} while (SU->isScheduled);
@@ -2957,8 +3095,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Top.bumpNode(SU);
if (SU->hasPhysRegUses)
reschedulePhysRegCopies(SU, true);
- }
- else {
+ } else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
Bot.bumpNode(SU);
if (SU->hasPhysRegDefs)
@@ -2976,8 +3113,12 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
- if (EnableLoadCluster && DAG->TII->enableClusterLoads())
- DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (EnableMemOpCluster) {
+ if (DAG->TII->enableClusterLoads())
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (DAG->TII->enableClusterStores())
+ DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
+ }
if (EnableMacroFusion)
DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
@@ -3065,12 +3206,10 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
-
- DEBUG(Q.dump());
-
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
SchedCandidate TryCand(Cand.Policy);
TryCand.SU = *I;
+ TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
if (TryCand.Reason != NoCand) {
@@ -3089,7 +3228,9 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
SUnit *SU;
do {
SU = Top.pickOnlyChoice();
- if (!SU) {
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
CandPolicy NoPolicy;
SchedCandidate TopCand(NoPolicy);
// Set the top-down policy based on the state of the current top zone and
@@ -3097,7 +3238,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
pickNodeFromQueue(TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
- tracePick(TopCand, true);
+ tracePick(TopCand);
SU = TopCand.SU;
}
} while (SU->isScheduled);
@@ -3285,8 +3426,7 @@ public:
TopQ.pop();
} while (SU->isScheduled);
IsTopNode = true;
- }
- else {
+ } else {
do {
if (BottomQ.empty()) return nullptr;
SU = BottomQ.top();
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 5e6d619..571a5c1 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -104,7 +105,7 @@ namespace {
private:
bool ProcessBlock(MachineBasicBlock &MBB);
- bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
/// \brief Postpone the splitting of the given critical
@@ -119,27 +120,27 @@ namespace {
///
/// \return True if the edge is marked as toSplit, false otherwise.
/// False can be returned if, for instance, this is not profitable.
- bool PostponeSplitCriticalEdge(MachineInstr *MI,
+ bool PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To,
bool BreakPHIEdge);
- bool SinkInstruction(MachineInstr *MI, bool &SawStore,
+ bool SinkInstruction(MachineInstr &MI, bool &SawStore,
AllSuccsCache &AllSuccessors);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
- MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
- bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors);
- bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ bool PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB);
SmallVector<MachineBasicBlock *, 4> &
- GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
+ GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
};
} // end anonymous namespace
@@ -154,13 +155,13 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
-bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB) {
- if (!MI->isCopy())
+ if (!MI.isCopy())
return false;
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
!TargetRegisterInfo::isVirtualRegister(DstReg) ||
!MRI->hasOneNonDBGUse(SrcReg))
@@ -175,9 +176,9 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
if (DefMI->isCopyLike())
return false;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
+ DEBUG(dbgs() << "*** to: " << MI);
MRI->replaceRegWith(DstReg, SrcReg);
- MI->eraseFromParent();
+ MI.eraseFromParent();
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
@@ -256,7 +257,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
}
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
DEBUG(dbgs() << "******** Machine Sinking ********\n");
@@ -283,7 +284,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// If we have anything we marked as toSplit, split it now.
for (auto &Pair : ToSplit) {
- auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this);
+ auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
if (NewSucc != nullptr) {
DEBUG(dbgs() << " *** Splitting critical edge:"
" BB#" << Pair.first->getNumber()
@@ -326,7 +327,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
--I;
bool ProcessedBegin, SawStore = false;
do {
- MachineInstr *MI = I; // The instruction to sink.
+ MachineInstr &MI = *I; // The instruction to sink.
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
@@ -334,7 +335,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (!ProcessedBegin)
--I;
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
continue;
bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
@@ -343,8 +344,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
continue;
}
- if (SinkInstruction(MI, SawStore, AllSuccessors))
- ++NumSunk, MadeChange = true;
+ if (SinkInstruction(MI, SawStore, AllSuccessors)) {
+ ++NumSunk;
+ MadeChange = true;
+ }
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
@@ -352,7 +355,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
return MadeChange;
}
-bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To) {
// FIXME: Need much better heuristics.
@@ -363,14 +366,14 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
if (!CEBCandidates.insert(std::make_pair(From, To)).second)
return true;
- if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI))
+ if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -391,7 +394,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
// If definition resides elsewhere, we aren't
// blocking it from being sunk so don't break the edge.
MachineInstr *DefMI = MRI->getVRegDef(Reg);
- if (DefMI->getParent() == MI->getParent())
+ if (DefMI->getParent() == MI.getParent())
return true;
}
}
@@ -399,7 +402,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
return false;
}
-bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI,
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
@@ -469,35 +472,30 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI,
return true;
}
-static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
- return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
-}
-
/// collectDebgValues - Scan instructions following MI and collect any
/// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr *MI,
+static void collectDebugValues(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DbgValues) {
DbgValues.clear();
- if (!MI->getOperand(0).isReg())
+ if (!MI.getOperand(0).isReg())
return;
MachineBasicBlock::iterator DI = MI; ++DI;
- for (MachineBasicBlock::iterator DE = MI->getParent()->end();
+ for (MachineBasicBlock::iterator DE = MI.getParent()->end();
DI != DE; ++DI) {
if (!DI->isDebugValue())
return;
if (DI->getOperand(0).isReg() &&
- DI->getOperand(0).getReg() == MI->getOperand(0).getReg())
- DbgValues.push_back(DI);
+ DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+ DbgValues.push_back(&*DI);
}
}
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
-bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors) {
- assert (MI && "Invalid MachineInstr!");
assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
if (MBB == SuccToSinkTo)
@@ -538,7 +536,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
/// computing it if it was not already cached.
SmallVector<MachineBasicBlock *, 4> &
-MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
+MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const {
// Do we have the sorted successors in cache ?
@@ -560,7 +558,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
DT->getNode(MBB)->getChildren();
for (const auto &DTChild : Children)
// DomTree children of MBB that have MBB as immediate dominator are added.
- if (DTChild->getIDom()->getBlock() == MI->getParent() &&
+ if (DTChild->getIDom()->getBlock() == MI.getParent() &&
// Skip MBBs already added to the AllSuccs vector above.
!MBB->isSuccessor(DTChild->getBlock()))
AllSuccs.push_back(DTChild->getBlock());
@@ -582,12 +580,10 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB,
}
/// FindSuccToSinkTo - Find a successor to sink this instruction to.
-MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
- MachineBasicBlock *MBB,
- bool &BreakPHIEdge,
- AllSuccsCache &AllSuccessors) {
-
- assert (MI && "Invalid MachineInstr!");
+MachineBasicBlock *
+MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge,
+ AllSuccsCache &AllSuccessors) {
assert (MBB && "Invalid MachineBasicBlock!");
// Loop over all the operands of the specified instruction. If there is
@@ -596,8 +592,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = nullptr;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
unsigned Reg = MO.getReg();
@@ -673,22 +669,70 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
return SuccToSinkTo;
}
+/// \brief Return true if MI is likely to be usable as a memory operation by the
+/// implicit null check optimization.
+///
+/// This is a "best effort" heuristic, and should not be relied upon for
+/// correctness. This returning true does not guarantee that the implicit null
+/// check optimization is legal over MI, and this returning false does not
+/// guarantee MI cannot possibly be used to do a null check.
+static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+
+ auto *MBB = MI.getParent();
+ if (MBB->pred_size() != 1)
+ return false;
+
+ auto *PredMBB = *MBB->pred_begin();
+ auto *PredBB = PredMBB->getBasicBlock();
+
+ // Frontends that don't use implicit null checks have no reason to emit
+ // branches with make.implicit metadata, and this function should always
+ // return false for them.
+ if (!PredBB ||
+ !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
+ return false;
+
+ unsigned BaseReg;
+ int64_t Offset;
+ if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ return false;
+
+ if (!(MI.mayLoad() && !MI.isPredicable()))
+ return false;
+
+ MachineBranchPredicate MBP;
+ if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
+ return false;
+
+ return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+ (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+ MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
+ MBP.LHS.getReg() == BaseReg;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
+bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
AllSuccsCache &AllSuccessors) {
- // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
- // be close to the source to make it easier to coalesce.
- if (AvoidsSinking(MI, MRI))
+ // Don't sink instructions that the target prefers not to sink.
+ if (!TII->shouldSink(MI))
return false;
// Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(AA, SawStore))
+ if (!MI.isSafeToMove(AA, SawStore))
return false;
// Convergent operations may not be made control-dependent on additional
// values.
- if (MI->isConvergent())
+ if (MI.isConvergent())
+ return false;
+
+ // Don't break implicit null checks. This is a performance heuristic, and not
+ // required for correctness.
+ if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
return false;
// FIXME: This should include support for sinking instructions within the
@@ -700,7 +744,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
// and z and only shrink the live range of x.
bool BreakPHIEdge = false;
- MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *ParentBlock = MI.getParent();
MachineBasicBlock *SuccToSinkTo =
FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
@@ -712,8 +756,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
@@ -721,7 +765,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
return false;
}
- DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+ DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
// If the block has multiple predecessors, this is a critical edge.
// Decide if we can sink along it or need to break the edge.
@@ -730,7 +774,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
// other code paths.
bool TryBreak = false;
bool store = true;
- if (!MI->isSafeToMove(AA, store)) {
+ if (!MI.isSafeToMove(AA, store)) {
DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
TryBreak = true;
}
@@ -804,7 +848,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
// Note that we have to clear the kill flags for any register this instruction
// uses as we may sink over another instruction which currently kills the
// used registers.
- for (MachineOperand &MO : MI->operands()) {
+ for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse())
RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index f7edacd..86332c8 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -328,8 +328,10 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
continue;
// Pick the predecessor that would give this block the smallest InstrDepth.
unsigned Depth = PredTBI->InstrDepth + CurCount;
- if (!Best || Depth < BestDepth)
- Best = Pred, BestDepth = Depth;
+ if (!Best || Depth < BestDepth) {
+ Best = Pred;
+ BestDepth = Depth;
+ }
}
return Best;
}
@@ -356,8 +358,10 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
continue;
// Pick the successor that would give this block the smallest InstrHeight.
unsigned Height = SuccTBI->InstrHeight;
- if (!Best || Height < BestHeight)
- Best = Succ, BestHeight = Height;
+ if (!Best || Height < BestHeight) {
+ Best = Succ;
+ BestHeight = Height;
+ }
}
return Best;
}
@@ -621,16 +625,16 @@ struct DataDep {
// Get the input data dependencies that must be ready before UseMI can issue.
// Return true if UseMI has any physreg operands.
-static bool getDataDeps(const MachineInstr *UseMI,
+static bool getDataDeps(const MachineInstr &UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineRegisterInfo *MRI) {
// Debug values should not be included in any calculations.
- if (UseMI->isDebugValue())
+ if (UseMI.isDebugValue())
return false;
bool HasPhysRegs = false;
- for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
- E = UseMI->operands_end(); I != E; ++I) {
+ for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
+ E = UseMI.operands_end(); I != E; ++I) {
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
@@ -643,7 +647,7 @@ static bool getDataDeps(const MachineInstr *UseMI,
}
// Collect virtual register reads.
if (MO.readsReg())
- Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I)));
+ Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
}
return HasPhysRegs;
}
@@ -651,17 +655,17 @@ static bool getDataDeps(const MachineInstr *UseMI,
// Get the input data dependencies of a PHI instruction, using Pred as the
// preferred predecessor.
// This will add at most one dependency to Deps.
-static void getPHIDeps(const MachineInstr *UseMI,
+static void getPHIDeps(const MachineInstr &UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineBasicBlock *Pred,
const MachineRegisterInfo *MRI) {
// No predecessor at the beginning of a trace. Ignore dependencies.
if (!Pred)
return;
- assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
- for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
- if (UseMI->getOperand(i + 1).getMBB() == Pred) {
- unsigned Reg = UseMI->getOperand(i).getReg();
+ assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
+ if (UseMI.getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI.getOperand(i).getReg();
Deps.push_back(DataDep(MRI, Reg, i));
return;
}
@@ -823,8 +827,8 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
// Collect all data dependencies.
Deps.clear();
if (UseMI.isPHI())
- getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
- else if (getDataDeps(&UseMI, Deps, MTM.MRI))
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
// Filter and process dependencies, computing the earliest issue cycle.
@@ -861,15 +865,16 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
// Identify physreg dependencies for MI when scanning instructions upwards.
// Return the issue height of MI after considering any live regunits.
// Height is the issue height computed from virtual register dependencies alone.
-static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
SparseSet<LiveRegUnit> &RegUnits,
const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
- for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end();
+ MOI != MOE; ++MOI) {
const MachineOperand &MO = *MOI;
if (!MO.isReg())
continue;
@@ -877,7 +882,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (MO.readsReg())
- ReadOps.push_back(MI->getOperandNo(MOI));
+ ReadOps.push_back(MI.getOperandNo(MOI));
if (!MO.isDef())
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
@@ -887,11 +892,11 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
if (I == RegUnits.end())
continue;
unsigned DepHeight = I->Cycle;
- if (!MI->isTransient()) {
+ if (!MI.isTransient()) {
// We may not know the UseMI of this dependency, if it came from the
// live-in list. SchedModel can handle a NULL UseMI.
- DepHeight += SchedModel
- .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op);
+ DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
+ I->MI, I->Op);
}
Height = std::max(Height, DepHeight);
// This regunit is dead above MI.
@@ -901,13 +906,13 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
// Now we know the height of MI. Update any regunits read.
for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
- unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+ unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
// Set the height to the highest reader of the unit.
- if (LRU.Cycle <= Height && LRU.MI != MI) {
+ if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
- LRU.MI = MI;
+ LRU.MI = &MI;
LRU.Op = ReadOps[i];
}
}
@@ -921,15 +926,14 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
// Push the height of DefMI upwards if required to match UseMI.
// Return true if this is the first time DefMI was seen.
-static bool pushDepHeight(const DataDep &Dep,
- const MachineInstr *UseMI, unsigned UseHeight,
- MIHeightMap &Heights,
+static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
+ unsigned UseHeight, MIHeightMap &Heights,
const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII) {
// Adjust height by Dep.DefMI latency.
if (!Dep.DefMI->isTransient())
- UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp);
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
+ Dep.UseOp);
// Update Heights[DefMI] to be the maximum height seen.
MIHeightMap::iterator I;
@@ -1048,13 +1052,13 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (!PHI.isPHI())
break;
Deps.clear();
- getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
if (!Deps.empty()) {
// Loop header PHI heights are all 0.
unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
- if (pushDepHeight(Deps.front(), &PHI, Height,
- Heights, MTM.SchedModel, MTM.TII))
+ if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
+ MTM.TII))
addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
}
}
@@ -1063,12 +1067,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// Go through the block backwards.
for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
BI != BB;) {
- const MachineInstr *MI = --BI;
+ const MachineInstr &MI = *--BI;
// Find the MI height as determined by virtual register uses in the
// trace below.
unsigned Cycle = 0;
- MIHeightMap::iterator HeightI = Heights.find(MI);
+ MIHeightMap::iterator HeightI = Heights.find(&MI);
if (HeightI != Heights.end()) {
Cycle = HeightI->second;
// We won't be seeing any more MI uses.
@@ -1078,27 +1082,27 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
// Don't process PHI deps. They depend on the specific predecessor, and
// we'll get them when visiting the predecessor.
Deps.clear();
- bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+ bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
// There may also be regunit dependencies to include in the height.
if (HasPhysRegs)
- Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
- MTM.SchedModel, MTM.TII, MTM.TRI);
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
+ MTM.TII, MTM.TRI);
// Update the required height of any virtual registers read by MI.
for (const DataDep &Dep : Deps)
if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
- InstrCycles &MICycles = Cycles[MI];
+ InstrCycles &MICycles = Cycles[&MI];
MICycles.Height = Cycle;
if (!TBI.HasValidInstrDepths) {
- DEBUG(dbgs() << Cycle << '\t' << *MI);
+ DEBUG(dbgs() << Cycle << '\t' << MI);
continue;
}
// Update critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
}
// Update virtual live-in heights. They were added by addLiveIns() with a 0
@@ -1143,26 +1147,25 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
}
unsigned
-MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
- assert(MI && "Not an instruction.");
- assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
+ assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
"MI must be in the trace center block");
InstrCycles Cyc = getInstrCycles(MI);
return getCriticalPath() - (Cyc.Depth + Cyc.Height);
}
unsigned
-MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
SmallVector<DataDep, 1> Deps;
getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
DataDep &Dep = Deps.front();
- unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
- DepCycle += TE.MTM.SchedModel
- .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
+ DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ &PHI, Dep.UseOp);
return DepCycle;
}
@@ -1248,13 +1251,13 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
return std::max(Instrs, PRMax);
}
-bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
- const MachineInstr *UseMI) const {
- if (DefMI->getParent() == UseMI->getParent())
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) const {
+ if (DefMI.getParent() == UseMI.getParent())
return true;
- const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
- const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
return DepTBI.isUsefulDominator(TBI);
}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 428295e..a70adb0 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -58,7 +58,7 @@ namespace {
Banner(b)
{}
- bool runOnMachineFunction(MachineFunction &MF);
+ unsigned verify(MachineFunction &MF);
Pass *const PASS;
const char *Banner;
@@ -217,10 +217,22 @@ namespace {
LaneBitmask LaneMask) const;
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
+ void report_context(SlotIndex Pos) const;
+ void report_context_liverange(const LiveRange &LR) const;
+ void report_context_lanemask(LaneBitmask LaneMask) const;
+ void report_context_vreg(unsigned VReg) const;
+ void report_context_vreg_regunit(unsigned VRegOrRegUnit) const;
void verifyInlineAsm(const MachineInstr *MI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
+ void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
+ SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask = 0);
+ void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
+ SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask = 0);
+
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
void checkPHIOps(const MachineBasicBlock *MBB);
@@ -239,6 +251,7 @@ namespace {
void verifyStackFrame();
void verifySlotIndexes() const;
+ void verifyProperties(const MachineFunction &MF);
};
struct MachineVerifierPass : public MachineFunctionPass {
@@ -256,7 +269,9 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- MF.verify(this, Banner.c_str());
+ unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
return false;
}
};
@@ -271,9 +286,13 @@ FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
return new MachineVerifierPass(Banner);
}
-void MachineFunction::verify(Pass *p, const char *Banner) const {
- MachineVerifier(p, Banner)
- .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
+ const {
+ MachineFunction &MF = const_cast<MachineFunction&>(*this);
+ unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF);
+ if (AbortOnErrors && FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
+ return FoundErrors == 0;
}
void MachineVerifier::verifySlotIndexes() const {
@@ -289,7 +308,20 @@ void MachineVerifier::verifySlotIndexes() const {
}
}
-bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+void MachineVerifier::verifyProperties(const MachineFunction &MF) {
+ // If a pass has introduced virtual registers without clearing the
+ // AllVRegsAllocated property (or set it without allocating the vregs)
+ // then report an error.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::AllVRegsAllocated) &&
+ MRI->getNumVirtRegs()) {
+ report(
+ "Function has AllVRegsAllocated property but there are VReg operands",
+ &MF);
+ }
+}
+
+unsigned MachineVerifier::verify(MachineFunction &MF) {
foundErrors = 0;
this->MF = &MF;
@@ -313,6 +345,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
verifySlotIndexes();
+ verifyProperties(MF);
+
visitMachineFunctionBefore();
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
@@ -374,9 +408,6 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
}
visitMachineFunctionAfter();
- if (foundErrors)
- report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
-
// Clean up.
regsLive.clear();
regsDefined.clear();
@@ -386,7 +417,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
regsLiveInButUnused.clear();
MBBInfoMap.clear();
- return false; // no changes
+ return foundErrors;
}
void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
@@ -420,8 +451,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
assert(MI);
report(msg, MI->getParent());
errs() << "- instruction: ";
- if (Indexes && Indexes->hasIndex(MI))
- errs() << Indexes->getInstructionIndex(MI) << '\t';
+ if (Indexes && Indexes->hasIndex(*MI))
+ errs() << Indexes->getInstructionIndex(*MI) << '\t';
MI->print(errs(), /*SkipOpers=*/true);
errs() << '\n';
}
@@ -435,16 +466,20 @@ void MachineVerifier::report(const char *msg,
errs() << "\n";
}
+void MachineVerifier::report_context(SlotIndex Pos) const {
+ errs() << "- at: " << Pos << '\n';
+}
+
void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
LaneBitmask LaneMask) const {
+ report_context_liverange(LR);
errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
if (LaneMask != 0)
- errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
- errs() << "- liverange: " << LR << '\n';
+ report_context_lanemask(LaneMask);
}
void MachineVerifier::report_context(const LiveRange::Segment &S) const {
@@ -455,6 +490,26 @@ void MachineVerifier::report_context(const VNInfo &VNI) const {
errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n";
}
+void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
+ errs() << "- liverange: " << LR << '\n';
+}
+
+void MachineVerifier::report_context_vreg(unsigned VReg) const {
+ errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n';
+}
+
+void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
+ if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ report_context_vreg(VRegOrUnit);
+ } else {
+ errs() << "- regunit: " << PrintRegUnit(VRegOrUnit, TRI) << '\n';
+ }
+}
+
+void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const {
+ errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
+}
+
void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
BBInfo &MInfo = MBBInfoMap[MBB];
if (!MInfo.reachable) {
@@ -521,7 +576,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
- MBB != MBB->getParent()->begin()) {
+ MBB->getIterator() != MBB->getParent()->begin()) {
report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
}
}
@@ -567,8 +622,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
- TBB, FBB, Cond)) {
+ if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB,
+ Cond)) {
// Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
// check whether its answers match up with reality.
if (!TBB && !FBB) {
@@ -591,7 +646,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
"differs from its CFG successor!", MBB);
}
if (!MBB->empty() && MBB->back().isBarrier() &&
- !TII->isPredicated(&MBB->back())) {
+ !TII->isPredicated(MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
@@ -721,8 +776,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// This function gets called for all bundle headers, including normal
// stand-alone unbundled instructions.
void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
- if (Indexes && Indexes->hasIndex(MI)) {
- SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (Indexes && Indexes->hasIndex(*MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(*MI);
if (!(idx > lastIndex)) {
report("Instruction index out of order", MI);
errs() << "Last instruction was at " << lastIndex << '\n';
@@ -733,7 +788,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
// Ensure non-terminators don't follow terminators.
// Ignore predicated terminators formed by if conversion.
// FIXME: If conversion shouldn't need to violate this rule.
- if (MI->isTerminator() && !TII->isPredicated(MI)) {
+ if (MI->isTerminator() && !TII->isPredicated(*MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -755,8 +810,9 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
if (!MI->getOperand(1).isImm())
report("Asm flags must be an immediate", MI);
// Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
- // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16.
- if (!isUInt<5>(MI->getOperand(1).getImm()))
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16,
+ // and Extra_IsConvergent = 32.
+ if (!isUInt<6>(MI->getOperand(1).getImm()))
report("Unknown asm flags", &MI->getOperand(1), 1);
static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
@@ -810,7 +866,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Debug values must not have a slot index.
// Other instructions must have one, unless they are inside a bundle.
if (LiveInts) {
- bool mapped = !LiveInts->isNotInMIMap(MI);
+ bool mapped = !LiveInts->isNotInMIMap(*MI);
if (MI->isDebugValue()) {
if (mapped)
report("Debug instruction has a slot index", MI);
@@ -824,7 +880,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
StringRef ErrorInfo;
- if (!TII->verifyInstruction(MI, ErrorInfo))
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
}
@@ -929,7 +985,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
} else {
// Virtual register.
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
+ if (!RC) {
+ // This is a generic virtual register.
+ // It must have a size and it must not have a SubIdx.
+ unsigned Size = MRI->getSize(Reg);
+ if (!Size) {
+ report("Generic virtual register must have a size", MO, MONum);
+ return;
+ }
+ // Make sure the register fits into its register bank if any.
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ if (RegBank && RegBank->getSize() < Size) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RegBank->getSize() << ") to fit " << Size << "-bits\n";
+ return;
+ }
+ if (SubIdx) {
+ report("Generic virtual register does not subregister index", MO, MONum);
+ return;
+ }
+ break;
+ }
if (SubIdx) {
const TargetRegisterClass *SRC =
TRI->getSubClassWithSubReg(RC, SubIdx);
@@ -984,10 +1063,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
case MachineOperand::MO_FrameIndex:
if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
- LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInts && !LiveInts->isNotInMIMap(*MI)) {
int FI = MO->getIndex();
LiveInterval &LI = LiveStks->getInterval(FI);
- SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ SlotIndex Idx = LiveInts->getInstructionIndex(*MI);
bool stores = MI->mayStore();
bool loads = MI->mayLoad();
@@ -1028,6 +1107,83 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
}
+void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
+ unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ LaneBitmask LaneMask) {
+ LiveQueryResult LRQ = LR.Query(UseIdx);
+ // Check if we have a segment at the use, note however that we only need one
+ // live subregister range, the others may be dead.
+ if (!LRQ.valueIn() && LaneMask == 0) {
+ report("No live segment at use", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ report_context(UseIdx);
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(UseIdx);
+ }
+}
+
+void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
+ unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ LaneBitmask LaneMask) {
+ if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(*VNI);
+ report_context(DefIdx);
+ }
+ } else {
+ report("No live segment at def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(DefIdx);
+ }
+ // Check that, if the dead def flag is present, LiveInts agree.
+ if (MO->isDead()) {
+ LiveQueryResult LRQ = LR.Query(DefIdx);
+ if (!LRQ.isDeadDef()) {
+ // In case of physregs we can have a non-dead definition on another
+ // operand.
+ bool otherDef = false;
+ if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ const MachineInstr &MI = *MO->getParent();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (*Units == VRegOrUnit) {
+ otherDef = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!otherDef) {
+ report("Live range continues after dead def flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ }
+ }
+ }
+}
+
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const unsigned Reg = MO->getReg();
@@ -1048,23 +1204,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
- SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) {
- LiveQueryResult LRQ = LR->Query(UseIdx);
- if (!LRQ.valueIn()) {
- report("No live segment at use", MO, MONum);
- errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
- << ' ' << *LR << '\n';
- }
- if (MO->isKill() && !LRQ.isKill()) {
- report("Live range continues after kill flag", MO, MONum);
- errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n';
- }
- }
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
+ checkLivenessAtUse(MO, MONum, UseIdx, *LR, *Units);
}
}
@@ -1072,16 +1218,28 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts->hasInterval(Reg)) {
// This is a virtual register interval.
const LiveInterval &LI = LiveInts->getInterval(Reg);
- LiveQueryResult LRQ = LI.Query(UseIdx);
- if (!LRQ.valueIn()) {
- report("No live segment at use", MO, MONum);
- errs() << UseIdx << " is not live in " << LI << '\n';
- }
- // Check for extra kill flags.
- // Note that we allow missing kill flags for now.
- if (MO->isKill() && !LRQ.isKill()) {
- report("Live range continues after kill flag", MO, MONum);
- errs() << "Live range: " << LI << '\n';
+ checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg);
+
+ if (LI.hasSubRanges() && !MO->isDef()) {
+ unsigned SubRegIdx = MO->getSubReg();
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask = 0;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((MOMask & SR.LaneMask) == 0)
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask) == 0) {
+ report("No live subrange at use", MO, MONum);
+ report_context(LI);
+ report_context(UseIdx);
+ }
}
} else {
report("Virtual register has no live interval", MO, MONum);
@@ -1154,33 +1312,29 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
report("Multiple virtual register defs in SSA form", MO, MONum);
// Check LiveInts for a live segment, but only for virtual registers.
- if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LiveInts->isNotInMIMap(MI)) {
- SlotIndex DefIdx = LiveInts->getInstructionIndex(MI);
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
- assert(VNI && "NULL valno is not allowed");
- if (VNI->def != DefIdx) {
- report("Inconsistent valno->def", MO, MONum);
- errs() << "Valno " << VNI->id << " is not defined at "
- << DefIdx << " in " << LI << '\n';
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
+
+ if (LI.hasSubRanges()) {
+ unsigned SubRegIdx = MO->getSubReg();
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & MOMask) == 0)
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
+ }
}
} else {
- report("No live segment at def", MO, MONum);
- errs() << DefIdx << " is not live in " << LI << '\n';
- }
- // Check that, if the dead def flag is present, LiveInts agree.
- if (MO->isDead()) {
- LiveQueryResult LRQ = LI.Query(DefIdx);
- if (!LRQ.isDeadDef()) {
- report("Live range continues after dead def flag", MO, MONum);
- errs() << "Live range: " << LI << '\n';
- }
+ report("Virtual register has no Live interval", MO, MONum);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
}
@@ -1360,9 +1514,10 @@ void MachineVerifier::visitMachineFunctionAfter() {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
for (RegSet::iterator
I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
- ++I)
- report("Virtual register def doesn't dominate all uses.",
- MRI->getVRegDef(*I));
+ ++I) {
+ report("Virtual register defs don't dominate all uses.", MF);
+ report_context_vreg(*I);
+ }
}
if (LiveVars)
@@ -1474,7 +1629,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
if (Reg != 0) {
bool hasDef = false;
bool isEarlyClobber = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || !MOI->isDef())
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1613,18 +1768,33 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// use, or a dead flag on a def.
bool hasRead = false;
bool hasSubRegDef = false;
- for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || MOI->getReg() != Reg)
continue;
if (LaneMask != 0 &&
(LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
continue;
- if (MOI->isDef() && MOI->getSubReg() != 0)
- hasSubRegDef = true;
+ if (MOI->isDef()) {
+ if (MOI->getSubReg() != 0)
+ hasSubRegDef = true;
+ if (MOI->isDead())
+ hasDeadDef = true;
+ }
if (MOI->readsReg())
hasRead = true;
}
- if (!S.end.isDead()) {
+ if (S.end.isDead()) {
+ // Make sure that the corresponding machine operand for a "dead" live
+ // range has the dead flag. We cannot perform this check for subregister
+ // liveranges as partially dead values are allowed.
+ if (LaneMask == 0 && !hasDeadDef) {
+ report("Instruction ending live segment on dead slot has no dead flag",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ } else {
if (!hasRead) {
// When tracking subregister liveness, the main range must start new
// values on partial register writes, even if there is no read.
@@ -1670,8 +1840,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
- // All predecessors must have a live-out value.
- if (!PVNI) {
+ // All predecessors must have a live-out value if this is not a
+ // subregister liverange.
+ if (!PVNI && LaneMask == 0) {
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp
deleted file mode 100644
index 17654a6..0000000
--- a/contrib/llvm/lib/CodeGen/OcamlGC.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements lowering for the llvm.gc* intrinsics compatible with
-// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
-//
-// The frametable emitter is in OcamlGCPrinter.cpp.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCs.h"
-#include "llvm/CodeGen/GCStrategy.h"
-
-using namespace llvm;
-
-namespace {
-class OcamlGC : public GCStrategy {
-public:
- OcamlGC();
-};
-}
-
-static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC");
-
-void llvm::linkOcamlGC() {}
-
-OcamlGC::OcamlGC() {
- NeededSafePoints = 1 << GC::PostCall;
- UsesMetadata = true;
-}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index a1042e7..0177e41 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -63,7 +63,7 @@ INITIALIZE_PASS(OptimizePHIs, "opt-phis",
"Optimize machine instruction PHIs", false, false)
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
- if (skipOptnoneFunction(*Fn.getFunction()))
+ if (skipFunction(*Fn.getFunction()))
return false;
MRI = &Fn.getRegInfo();
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index 2c93792..b8d5431 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "PHIEliminationUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -25,9 +24,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -121,6 +120,7 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
"Eliminate PHI nodes for register allocation", false, false)
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addUsedIfAvailable<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -159,17 +159,16 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
unsigned DefReg = DefMI->getOperand(0).getReg();
if (MRI->use_nodbg_empty(DefReg)) {
if (LIS)
- LIS->RemoveMachineInstrFromMaps(DefMI);
+ LIS->RemoveMachineInstrFromMaps(*DefMI);
DefMI->eraseFromParent();
}
}
// Clean up the lowered PHI instructions.
- for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
- I != E; ++I) {
+ for (auto &I : LoweredPHIs) {
if (LIS)
- LIS->RemoveMachineInstrFromMaps(I->first);
- MF.DeleteMachineInstr(I->first);
+ LIS->RemoveMachineInstrFromMaps(*I.first);
+ MF.DeleteMachineInstr(I.first);
}
LoweredPHIs.clear();
@@ -228,7 +227,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);
// Unlink the PHI node from the basic block, but don't delete the PHI yet.
- MachineInstr *MPhi = MBB.remove(MBB.begin());
+ MachineInstr *MPhi = MBB.remove(&*MBB.begin());
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
unsigned DestReg = MPhi->getOperand(0).getReg();
@@ -270,7 +269,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Update live variable information if there is any.
if (LV) {
- MachineInstr *PHICopy = std::prev(AfterPHIsIt);
+ MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
if (IncomingReg) {
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
@@ -284,7 +283,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming)
if (MachineInstr *OldKill = VI.findKill(&MBB)) {
DEBUG(dbgs() << "Remove old kill from " << *OldKill);
- LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
DEBUG(MBB.dump());
}
@@ -298,19 +297,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Since we are going to be deleting the PHI node, if it is the last use of
// any registers, or if the value itself is dead, we need to move this
// information over to the new copy we just inserted.
- LV->removeVirtualRegistersKilled(MPhi);
+ LV->removeVirtualRegistersKilled(*MPhi);
// If the result is dead, update LV.
if (isDead) {
LV->addVirtualRegisterDead(DestReg, PHICopy);
- LV->removeVirtualRegisterDead(DestReg, MPhi);
+ LV->removeVirtualRegisterDead(DestReg, *MPhi);
}
}
// Update LiveIntervals for the new copy or implicit def.
if (LIS) {
- MachineInstr *NewInstr = std::prev(AfterPHIsIt);
- SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
+ SlotIndex DestCopyIndex =
+ LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
if (IncomingReg) {
@@ -453,7 +452,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
// Finally, mark it killed.
- LV->addVirtualRegisterKilled(SrcReg, KillInst);
+ LV->addVirtualRegisterKilled(SrcReg, *KillInst);
// This vreg no longer lives all of the way through opBlock.
unsigned opBlockNum = opBlock.getNumber();
@@ -462,8 +461,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (LIS) {
if (NewSrcInstr) {
- LIS->InsertMachineInstrInMaps(NewSrcInstr);
- LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr);
+ LIS->InsertMachineInstrInMaps(*NewSrcInstr);
+ LIS->addSegmentToEndOfBlock(IncomingReg, *NewSrcInstr);
}
if (!SrcUndef &&
@@ -513,7 +512,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
assert(KillInst->readsRegister(SrcReg) &&
"Cannot find kill instruction");
- SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst);
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst);
SrcLI.removeSegment(LastUseIndex.getRegSlot(),
LIS->getMBBEndIdx(&opBlock));
}
@@ -524,7 +523,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
if (reusedIncoming || !IncomingReg) {
if (LIS)
- LIS->RemoveMachineInstrFromMaps(MPhi);
+ LIS->RemoveMachineInstrFromMaps(*MPhi);
MF.DeleteMachineInstr(MPhi);
}
}
@@ -612,7 +611,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
}
if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
- if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
+ if (!PreMBB->SplitCriticalEdge(&MBB, *this)) {
DEBUG(dbgs() << "Failed to split critical edge.\n");
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
index e73ba02..ccdaec1 100644
--- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -19,78 +19,81 @@
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/thread.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SplitModule.h"
using namespace llvm;
static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
- const Target *TheTarget, StringRef CPU, StringRef Features,
- const TargetOptions &Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL,
+ function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
TargetMachine::CodeGenFileType FileType) {
- std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
- M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
-
+ std::unique_ptr<TargetMachine> TM = TMFactory();
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
report_fatal_error("Failed to setup codegen");
CodeGenPasses.run(*M);
}
-std::unique_ptr<Module>
-llvm::splitCodeGen(std::unique_ptr<Module> M,
- ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
- StringRef Features, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
- TargetMachine::CodeGenFileType FileType) {
- StringRef TripleStr = M->getTargetTriple();
- std::string ErrMsg;
- const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
- if (!TheTarget)
- report_fatal_error(Twine("Target not found: ") + ErrMsg);
+std::unique_ptr<Module> llvm::splitCodeGen(
+ std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
+ ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
+ const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
+ TargetMachine::CodeGenFileType FileType, bool PreserveLocals) {
+ assert(BCOSs.empty() || BCOSs.size() == OSs.size());
if (OSs.size() == 1) {
- codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
- OL, FileType);
+ if (!BCOSs.empty())
+ WriteBitcodeToFile(M.get(), *BCOSs[0]);
+ codegen(M.get(), *OSs[0], TMFactory, FileType);
return M;
}
- std::vector<thread> Threads;
- SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
- // We want to clone the module in a new context to multi-thread the codegen.
- // We do it by serializing partition modules to bitcode (while still on the
- // main thread, in order to avoid data races) and spinning up new threads
- // which deserialize the partitions into separate contexts.
- // FIXME: Provide a more direct way to do this in LLVM.
- SmallVector<char, 0> BC;
- raw_svector_ostream BCOS(BC);
- WriteBitcodeToFile(MPart.get(), BCOS);
+ // Create ThreadPool in nested scope so that threads will be joined
+ // on destruction.
+ {
+ ThreadPool CodegenThreadPool(OSs.size());
+ int ThreadCount = 0;
- llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
- Threads.emplace_back(
- [TheTarget, CPU, Features, Options, RM, CM, OL, FileType,
- ThreadOS](const SmallVector<char, 0> &BC) {
- LLVMContext Ctx;
- ErrorOr<std::unique_ptr<Module>> MOrErr =
- parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
- "<split-module>"),
- Ctx);
- if (!MOrErr)
- report_fatal_error("Failed to read bitcode");
- std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+ SplitModule(
+ std::move(M), OSs.size(),
+ [&](std::unique_ptr<Module> MPart) {
+ // We want to clone the module in a new context to multi-thread the
+ // codegen. We do it by serializing partition modules to bitcode
+ // (while still on the main thread, in order to avoid data races) and
+ // spinning up new threads which deserialize the partitions into
+ // separate contexts.
+ // FIXME: Provide a more direct way to do this in LLVM.
+ SmallString<0> BC;
+ raw_svector_ostream BCOS(BC);
+ WriteBitcodeToFile(MPart.get(), BCOS);
- codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
- Options, RM, CM, OL, FileType);
- },
- // Pass BC using std::move to ensure that it get moved rather than
- // copied into the thread's context.
- std::move(BC));
- });
+ if (!BCOSs.empty()) {
+ BCOSs[ThreadCount]->write(BC.begin(), BC.size());
+ BCOSs[ThreadCount]->flush();
+ }
+
+ llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++];
+ // Enqueue the task
+ CodegenThreadPool.async(
+ [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
+ LLVMContext Ctx;
+ ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+ MemoryBufferRef(StringRef(BC.data(), BC.size()),
+ "<split-module>"),
+ Ctx);
+ if (!MOrErr)
+ report_fatal_error("Failed to read bitcode");
+ std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
- for (thread &T : Threads)
- T.join();
+ codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType);
+ },
+ // Pass BC using std::move to ensure that it get moved rather than
+ // copied into the thread's context.
+ std::move(BC));
+ },
+ PreserveLocals);
+ }
return {};
}
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
new file mode 100644
index 0000000..32468c9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -0,0 +1,88 @@
+//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements edits function bodies in place to support the
+// "patchable-function" attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct PatchableFunction : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PatchableFunction() : MachineFunctionPass(ID) {
+ initializePatchableFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+};
+}
+
+/// Returns true if instruction \p MI will not result in actual machine code
+/// instructions.
+static bool doesNotGeneratecode(const MachineInstr &MI) {
+ // TODO: Introduce an MCInstrDesc flag for this
+ switch (MI.getOpcode()) {
+ default: return false;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return true;
+ }
+}
+
+bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction()->hasFnAttribute("patchable-function"))
+ return false;
+
+#ifndef NDEBUG
+ Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function");
+ StringRef PatchType = PatchAttr.getValueAsString();
+ assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
+#endif
+
+ auto &FirstMBB = *MF.begin();
+ MachineBasicBlock::iterator FirstActualI = FirstMBB.begin();
+ for (; doesNotGeneratecode(*FirstActualI); ++FirstActualI)
+ assert(FirstActualI != FirstMBB.end());
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_OP))
+ .addImm(2)
+ .addImm(FirstActualI->getOpcode());
+
+ for (auto &MO : FirstActualI->operands())
+ MIB.addOperand(MO);
+
+ FirstActualI->eraseFromParent();
+ MF.ensureAlignment(4);
+ return true;
+}
+
+char PatchableFunction::ID = 0;
+char &llvm::PatchableFunctionID = PatchableFunction::ID;
+INITIALIZE_PASS(PatchableFunction, "patchable-function",
+ "Implement the 'patchable-function' attribute", false, false)
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 52b42b6..60b27dd 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -394,10 +394,10 @@ namespace {
char PeepholeOptimizer::ID = 0;
char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
-INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
/// If instruction is a copy-like instruction, i.e. it reads a single register
@@ -564,13 +564,13 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
// physical register, we can try to optimize it.
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
- if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
(SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
return false;
// Attempt to optimize the comparison instruction.
- if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
++NumCmps;
return true;
}
@@ -585,11 +585,11 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
unsigned FalseOp = 0;
bool Optimizable = false;
SmallVector<MachineOperand, 4> Cond;
- if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+ if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable))
return false;
if (!Optimizable)
return false;
- if (!TII->optimizeSelect(MI, LocalMIs))
+ if (!TII->optimizeSelect(*MI, LocalMIs))
return false;
MI->eraseFromParent();
++NumSelects;
@@ -599,7 +599,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
/// \brief Check if a simpler conditional branch can be
// generated
bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
- return TII->optimizeCondBranch(MI);
+ return TII->optimizeCondBranch(*MI);
}
/// \brief Try to find the next source that share the same register file
@@ -1351,7 +1351,7 @@ bool PeepholeOptimizer::foldImmediate(
continue;
DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
- if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) {
++NumImmFold;
return true;
}
@@ -1471,7 +1471,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
}
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
@@ -1636,10 +1636,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// we need it for markUsesInDebugValueAsUndef().
unsigned FoldedReg = FoldAsLoadDefReg;
MachineInstr *DefMI = nullptr;
- MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
- FoldAsLoadDefReg,
- DefMI);
- if (FoldMI) {
+ if (MachineInstr *FoldMI =
+ TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
// Update LocalMIs since we replaced MI with FoldMI and deleted
// DefMI.
DEBUG(dbgs() << "Replacing: " << *MI);
@@ -1888,9 +1886,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
ValueTrackerResult ValueTracker::getNextSourceImpl() {
assert(Def && "This method needs a valid definition");
- assert(
- (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
- Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
+ assert(((Def->getOperand(DefIdx).isDef() &&
+ (DefIdx < Def->getDesc().getNumDefs() ||
+ Def->getDesc().isVariadic())) ||
+ Def->getOperand(DefIdx).isImplicit()) &&
+ "Invalid DefIdx");
if (Def->isCopy())
return getNextSourceFromCopy();
if (Def->isBitcast())
diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
new file mode 100644
index 0000000..5bc5f75
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -0,0 +1,98 @@
+//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This runs the hazard recognizer and emits noops when necessary. This
+/// gives targets a way to run the hazard recognizer without running one of
+/// the schedulers. Example use cases for this pass would be:
+///
+/// - Targets that need the hazard recognizer to be run at -O0.
+/// - Targets that want to guarantee that hazards at the beginning of
+/// scheduling regions are handled correctly. The post-RA scheduler is
+/// a top-down scheduler, but when there are multiple scheduling regions
+/// in a basic block, it visits the regions in bottom-up order. This
+/// makes it impossible for the scheduler to gauranttee it can correctly
+/// handle hazards at the beginning of scheduling regions.
+///
+/// This pass traverses all the instructions in a program in top-down order.
+/// In contrast to the instruction scheduling passes, this pass never resets
+/// the hazard recognizer to ensure it can correctly handles noop hazards at
+/// the begining of blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-hazard-rec"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+
+namespace {
+ class PostRAHazardRecognizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ PostRAHazardRecognizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ };
+ char PostRAHazardRecognizer::ID = 0;
+
+}
+
+char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizer::ID;
+
+INITIALIZE_PASS(PostRAHazardRecognizer, DEBUG_TYPE,
+ "Post RA hazard recognizer", false, false)
+
+bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ std::unique_ptr<ScheduleHazardRecognizer> HazardRec(
+ TII->CreateTargetPostRAHazardRecognizer(Fn));
+
+ // Return if the target has not implemented a hazard recognizer.
+ if (!HazardRec.get())
+ return false;
+
+ // Loop over all of the basic blocks
+ for (auto &MBB : Fn) {
+ // We do not call HazardRec->reset() here to make sure we are handling noop
+ // hazards at the start of basic blocks.
+ for (MachineInstr &MI : MBB) {
+ // If we need to emit noops prior to this instruction, then do so.
+ unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
+ for (unsigned i = 0; i != NumPreNoops; ++i) {
+ HazardRec->EmitNoop();
+ TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
+ ++NumNoops;
+ }
+
+ HazardRec->EmitInstruction(&MI);
+ if (HazardRec->atIssueLimit()) {
+ HazardRec->AdvanceCycle();
+ }
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index b95dffd..3fce307 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -18,11 +18,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "AggressiveAntiDepBreaker.h"
#include "AntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -31,10 +29,12 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -96,8 +96,14 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
bool runOnMachineFunction(MachineFunction &Fn) override;
+ private:
bool enablePostRAScheduler(
const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
@@ -128,6 +134,9 @@ namespace {
/// The schedule. Null SUnit*'s represent noop instructions.
std::vector<SUnit*> Sequence;
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
/// The index in BB of RegionEnd.
///
/// This is the instruction number from the top of the current block, not
@@ -169,13 +178,16 @@ namespace {
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
- void Observe(MachineInstr *MI, unsigned Count);
+ void Observe(MachineInstr &MI, unsigned Count);
/// finishBlock - Clean up register live-range state.
///
void finishBlock() override;
private:
+ /// Apply each ScheduleDAGMutation step in order.
+ void postprocessDAG();
+
void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
@@ -203,6 +215,7 @@ SchedulePostRATDList::SchedulePostRATDList(
HazardRec =
MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
InstrItins, this);
+ MF.getSubtarget().getPostRAMutations(Mutations);
assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
MRI.tracksLiveness()) &&
@@ -257,12 +270,17 @@ bool PostRAScheduler::enablePostRAScheduler(
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
Mode = ST.getAntiDepBreakMode();
ST.getCriticalPathRCs(CriticalPathRCs);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ if (EnablePostRAScheduler.getPosition() > 0)
+ return EnablePostRAScheduler;
+
return ST.enablePostRAScheduler() &&
OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
}
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
- if (skipOptnoneFunction(*Fn.getFunction()))
+ if (skipFunction(*Fn.getFunction()))
return false;
TII = Fn.getSubtarget().getInstrInfo();
@@ -272,20 +290,15 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
RegClassInfo.runOnMachineFunction(Fn);
- // Check for explicit enable/disable of post-ra scheduling.
TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
TargetSubtargetInfo::ANTIDEP_NONE;
SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
- if (EnablePostRAScheduler.getPosition() > 0) {
- if (!EnablePostRAScheduler)
- return false;
- } else {
- // Check that post-RA scheduling is enabled for this target.
- // This may upgrade the AntiDepMode.
- if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
- AntiDepMode, CriticalPathRCs))
- return false;
- }
+
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
+ AntiDepMode, CriticalPathRCs))
+ return false;
// Check for antidep breaking override...
if (EnableAntiDepBreaking.getPosition() > 0) {
@@ -322,24 +335,24 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator Current = MBB.end();
unsigned Count = MBB.size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
- MachineInstr *MI = std::prev(I);
+ MachineInstr &MI = *std::prev(I);
--Count;
// Calls are not scheduling boundaries before register allocation, but
// post-ra we don't gain anything by scheduling across calls since we
// don't need to worry about register pressure.
- if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+ if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
Scheduler.setEndIndex(CurrentCount);
Scheduler.schedule();
Scheduler.exitRegion();
Scheduler.EmitSchedule();
- Current = MI;
+ Current = &MI;
CurrentCount = Count;
Scheduler.Observe(MI, CurrentCount);
}
I = MI;
- if (MI->isBundle())
- Count -= MI->getBundleSize();
+ if (MI.isBundle())
+ Count -= MI.getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB.begin() == Current || CurrentCount != 0) &&
@@ -398,6 +411,8 @@ void SchedulePostRATDList::schedule() {
}
}
+ postprocessDAG();
+
DEBUG(dbgs() << "********** List Scheduling **********\n");
DEBUG(
for (const SUnit &SU : SUnits) {
@@ -414,7 +429,7 @@ void SchedulePostRATDList::schedule() {
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
-void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
if (AntiDepBreak)
AntiDepBreak->Observe(MI, Count, EndIndex);
}
@@ -429,6 +444,12 @@ void SchedulePostRATDList::finishBlock() {
ScheduleDAGInstrs::finishBlock();
}
+/// Apply each ScheduleDAGMutation step in order.
+void SchedulePostRATDList::postprocessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
//===----------------------------------------------------------------------===//
// Top-Down Scheduling
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
new file mode 100644
index 0000000..fbc2bc6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -0,0 +1,94 @@
+//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR lowering for the llvm.load.relative intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+bool lowerLoadRelative(Function &F) {
+ if (F.use_empty())
+ return false;
+
+ bool Changed = false;
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Type *Int32PtrTy = Int32Ty->getPointerTo();
+ Type *Int8Ty = Type::getInt8Ty(F.getContext());
+
+ for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
+ auto CI = dyn_cast<CallInst>(I->getUser());
+ ++I;
+ if (!CI || CI->getCalledValue() != &F)
+ continue;
+
+ IRBuilder<> B(CI);
+ Value *OffsetPtr =
+ B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
+ Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
+ Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
+
+ Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
+
+ CI->replaceAllUsesWith(ResultPtr);
+ CI->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool lowerIntrinsics(Module &M) {
+ bool Changed = false;
+ for (Function &F : M) {
+ if (F.getName().startswith("llvm.load.relative."))
+ Changed |= lowerLoadRelative(F);
+ }
+ return Changed;
+}
+
+class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
+public:
+ static char ID;
+ PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) { return lowerIntrinsics(M); }
+};
+
+char PreISelIntrinsicLoweringLegacyPass::ID;
+}
+
+INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass,
+ "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering",
+ false, false)
+
+namespace llvm {
+ModulePass *createPreISelIntrinsicLoweringPass() {
+ return new PreISelIntrinsicLoweringLegacyPass;
+}
+
+PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!lowerIntrinsics(M))
+ return PreservedAnalyses::all();
+ else
+ return PreservedAnalyses::none();
+}
+} // End llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 939c500..20a9a39 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -35,7 +34,6 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -49,48 +47,83 @@ using namespace llvm;
#define DEBUG_TYPE "pei"
+typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
+static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCXFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks);
+
+static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS);
+
namespace {
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(ID) {
+ explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) {
initializePEIPass(*PassRegistry::getPassRegistry());
+
+ if (TM && (!TM->usesPhysRegsForPEI())) {
+ SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
+ unsigned &, unsigned &, const MBBVector &,
+ const MBBVector &) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
+ } else {
+ SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
+ ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
+ UsesCalleeSaves = true;
+ }
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ MachineFunctionProperties MFP;
+ if (UsesCalleeSaves)
+ MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated);
+ return MFP;
+ }
+
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
///
bool runOnMachineFunction(MachineFunction &Fn) override;
private:
+ std::function<void(MachineFunction &MF, RegScavenger *RS,
+ unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks)>
+ SpillCalleeSavedRegisters;
+ std::function<void(MachineFunction &MF, RegScavenger *RS)>
+ ScavengeFrameVirtualRegs;
+
+ bool UsesCalleeSaves = false;
+
RegScavenger *RS;
// MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
// stack frame indexes.
- unsigned MinCSFrameIndex, MaxCSFrameIndex;
+ unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ unsigned MaxCSFrameIndex = 0;
// Save and Restore blocks of the current function. Typically there is a
// single save block, unless Windows EH funclets are involved.
- SmallVector<MachineBasicBlock *, 1> SaveBlocks;
- SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
// Flag to control whether to use the register scavenger to resolve
// frame index materialization registers. Set according to
// TRI->requiresFrameIndexScavenging() for the current function.
bool FrameIndexVirtualScavenging;
- void calculateSets(MachineFunction &Fn);
- void calculateCallsInformation(MachineFunction &Fn);
- void assignCalleeSavedSpillSlots(MachineFunction &Fn,
- const BitVector &SavedRegs);
- void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateCallFrameInfo(MachineFunction &Fn);
+ void calculateSaveRestoreBlocks(MachineFunction &Fn);
+
void calculateFrameObjectOffsets(MachineFunction &Fn);
void replaceFrameIndices(MachineFunction &Fn);
void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
int &SPAdj);
- void scavengeFrameVirtualRegs(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn);
};
} // namespace
@@ -103,15 +136,19 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
cl::desc("Warn for stack size bigger than the given"
" number"));
-INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(PEI, "prologepilog",
- "Prologue/Epilogue Insertion & Frame Finalization",
- false, false)
+INITIALIZE_TM_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+MachineFunctionPass *
+llvm::createPrologEpilogInserterPass(const TargetMachine *TM) {
+ return new PEI(TM);
+}
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
@@ -122,40 +159,9 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<StackProtector>();
- AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-/// Compute the set of return blocks
-void PEI::calculateSets(MachineFunction &Fn) {
- const MachineFrameInfo *MFI = Fn.getFrameInfo();
-
- // Even when we do not change any CSR, we still want to insert the
- // prologue and epilogue of the function.
- // So set the save points for those.
-
- // Use the points found by shrink-wrapping, if any.
- if (MFI->getSavePoint()) {
- SaveBlocks.push_back(MFI->getSavePoint());
- assert(MFI->getRestorePoint() && "Both restore and save must be set");
- MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
- // If RestoreBlock does not have any successor and is not a return block
- // then the end point is unreachable and we do not need to insert any
- // epilogue.
- if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
- RestoreBlocks.push_back(RestoreBlock);
- return;
- }
-
- // Save refs to entry and return blocks.
- SaveBlocks.push_back(&Fn.front());
- for (MachineBasicBlock &MBB : Fn) {
- if (MBB.isEHFuncletEntry())
- SaveBlocks.push_back(&MBB);
- if (MBB.isReturnBlock())
- RestoreBlocks.push_back(&MBB);
- }
-}
/// StackObjSet - A set of stack object indexes
typedef SmallSetVector<int, 8> StackObjSet;
@@ -168,30 +174,21 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
-
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
// instructions.
- calculateCallsInformation(Fn);
+ calculateCallFrameInfo(Fn);
- // Determine which of the registers in the callee save list should be saved.
- BitVector SavedRegs;
- TFI->determineCalleeSaves(Fn, SavedRegs, RS);
-
- // Insert spill code for any callee saved registers that are modified.
- assignCalleeSavedSpillSlots(Fn, SavedRegs);
-
- // Determine placement of CSR spill/restore code:
+ // Determine placement of CSR spill/restore code and prolog/epilog code:
// place all spills in the entry block, all restores in return blocks.
- calculateSets(Fn);
+ calculateSaveRestoreBlocks(Fn);
- // Add the code to save and restore the callee saved registers.
- if (!F->hasFnAttribute(Attribute::Naked))
- insertCSRSpillsAndRestores(Fn);
+ // Handle CSR spilling and restoring, for targets that need it.
+ SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
+ SaveBlocks, RestoreBlocks);
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
@@ -216,11 +213,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// If register scavenging is needed, as we've enabled doing it as a
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
- if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
- scavengeFrameVirtualRegs(Fn);
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
+ ScavengeFrameVirtualRegs(Fn, RS);
- // Clear any vregs created by virtual scavenging.
- Fn.getRegInfo().clearVirtRegs();
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+ }
// Warn on stack size when we exceeds the given limit.
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -233,13 +231,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
delete RS;
SaveBlocks.clear();
RestoreBlocks.clear();
+ MFI->setSavePoint(nullptr);
+ MFI->setRestorePoint(nullptr);
return true;
}
-/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// Calculate the MaxCallFrameSize and AdjustsStack
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
-void PEI::calculateCallsInformation(MachineFunction &Fn) {
+void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -290,12 +290,42 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
}
}
-void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
- const BitVector &SavedRegs) {
- // These are used to keep track the callee-save area. Initialize them.
- MinCSFrameIndex = INT_MAX;
- MaxCSFrameIndex = 0;
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
+ const MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI->getSavePoint()) {
+ SaveBlocks.push_back(MFI->getSavePoint());
+ assert(MFI->getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+static void assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex) {
if (SavedRegs.empty())
return;
@@ -323,14 +353,13 @@ void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
// Now that we know which registers need to be saved and restored, allocate
// stack slots for them.
- for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
- I != E; ++I) {
- unsigned Reg = I->getReg();
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
int FrameIdx;
if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
- I->setFrameIdx(FrameIdx);
+ CS.setFrameIdx(FrameIdx);
continue;
}
@@ -359,7 +388,7 @@ void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
}
- I->setFrameIdx(FrameIdx);
+ CS.setFrameIdx(FrameIdx);
}
}
@@ -427,7 +456,9 @@ static void updateLiveness(MachineFunction &MF) {
/// insertCSRSpillsAndRestores - Insert spill and restore code for
/// callee saved registers used in the function.
///
-void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+static void insertCSRSpillsAndRestores(MachineFunction &Fn,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks) {
// Get callee saved register information.
MachineFrameInfo *MFI = Fn.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -496,6 +527,28 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
}
}
+static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks) {
+ const Function *F = Fn.getFunction();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ MaxCSFrameIndex = 0;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+
+ // Assign stack slots for any callee-saved registers that must be spilled.
+ assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks);
+}
+
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void
AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
@@ -512,7 +565,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
MaxAlign = std::max(MaxAlign, Align);
// Adjust to alignment boundary.
- Offset = RoundUpToAlignment(Offset, Align, Skew);
+ Offset = alignTo(Offset, Align, Skew);
if (StackGrowsDown) {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
@@ -524,6 +577,108 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
}
}
+/// Compute which bytes of fixed and callee-save stack area are unused and keep
+/// track of them in StackBytesFree.
+///
+static inline void
+computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
+ unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
+ int64_t FixedCSEnd, BitVector &StackBytesFree) {
+ // Avoid undefined int64_t -> int conversion below in extreme case.
+ if (FixedCSEnd > std::numeric_limits<int>::max())
+ return;
+
+ StackBytesFree.resize(FixedCSEnd, true);
+
+ SmallVector<int, 16> AllocatedFrameSlots;
+ // Add fixed objects.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
+ AllocatedFrameSlots.push_back(i);
+ // Add callee-save objects.
+ for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
+ AllocatedFrameSlots.push_back(i);
+
+ for (int i : AllocatedFrameSlots) {
+ // These are converted from int64_t, but they should always fit in int
+ // because of the FixedCSEnd check above.
+ int ObjOffset = MFI->getObjectOffset(i);
+ int ObjSize = MFI->getObjectSize(i);
+ int ObjStart, ObjEnd;
+ if (StackGrowsDown) {
+ // ObjOffset is negative when StackGrowsDown is true.
+ ObjStart = -ObjOffset - ObjSize;
+ ObjEnd = -ObjOffset;
+ } else {
+ ObjStart = ObjOffset;
+ ObjEnd = ObjOffset + ObjSize;
+ }
+ // Ignore fixed holes that are in the previous stack frame.
+ if (ObjEnd > 0)
+ StackBytesFree.reset(ObjStart, ObjEnd);
+ }
+}
+
+/// Assign frame object to an unused portion of the stack in the fixed stack
+/// object range. Return true if the allocation was successful.
+///
+static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, unsigned MaxAlign,
+ BitVector &StackBytesFree) {
+ if (MFI->isVariableSizedObjectIndex(FrameIdx))
+ return false;
+
+ if (StackBytesFree.none()) {
+ // clear it to speed up later scavengeStackSlot calls to
+ // StackBytesFree.none()
+ StackBytesFree.clear();
+ return false;
+ }
+
+ unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
+ if (ObjAlign > MaxAlign)
+ return false;
+
+ int64_t ObjSize = MFI->getObjectSize(FrameIdx);
+ int FreeStart;
+ for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
+ FreeStart = StackBytesFree.find_next(FreeStart)) {
+
+ // Check that free space has suitable alignment.
+ unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
+ if (alignTo(ObjStart, ObjAlign) != ObjStart)
+ continue;
+
+ if (FreeStart + ObjSize > StackBytesFree.size())
+ return false;
+
+ bool AllBytesFree = true;
+ for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
+ if (!StackBytesFree.test(FreeStart + Byte)) {
+ AllBytesFree = false;
+ break;
+ }
+ if (AllBytesFree)
+ break;
+ }
+
+ if (FreeStart == -1)
+ return false;
+
+ if (StackGrowsDown) {
+ int ObjStart = -(FreeStart + ObjSize);
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
+ << "]\n");
+ MFI->setObjectOffset(FrameIdx, ObjStart);
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
+ << "]\n");
+ MFI->setObjectOffset(FrameIdx, FreeStart);
+ }
+
+ StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
+ return true;
+}
+
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
static void
@@ -568,9 +723,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
- // We currently don't support filling in holes in between fixed sized
- // objects, so we adjust 'Offset' to point to the end of last fixed sized
- // preallocated object.
+ // Adjust 'Offset' to point to the end of last fixed sized preallocated
+ // object.
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
int64_t FixedOff;
if (StackGrowsDown) {
@@ -596,22 +750,27 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align, Skew);
+ Offset = alignTo(Offset, Align, Skew);
+ DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
MFI->setObjectOffset(i, -Offset); // Set the computed offset
}
- } else {
- int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
- for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
+ // Be careful about underflow in comparisons agains MinCSFrameIndex.
+ for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
unsigned Align = MFI->getObjectAlignment(i);
// Adjust to alignment boundary
- Offset = RoundUpToAlignment(Offset, Align, Skew);
+ Offset = alignTo(Offset, Align, Skew);
+ DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
MFI->setObjectOffset(i, Offset);
Offset += MFI->getObjectSize(i);
}
}
+ // FixedCSEnd is the stack offset to the end of the fixed and callee-save
+ // stack area.
+ int64_t FixedCSEnd = Offset;
unsigned MaxAlign = MFI->getMaxAlignment();
// Make sure the special register scavenging spill slot is closest to the
@@ -638,7 +797,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
unsigned Align = MFI->getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = RoundUpToAlignment(Offset, Align, Skew);
+ Offset = alignTo(Offset, Align, Skew);
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -656,6 +815,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
MaxAlign = std::max(Align, MaxAlign);
}
+ // Retrieve the Exception Handler registration node.
+ int EHRegNodeFrameIndex = INT_MAX;
+ if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
+ EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
+
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
@@ -678,7 +842,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (MFI->isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i)
+ if (MFI->getStackProtectorIndex() == (int)i ||
+ EHRegNodeFrameIndex == (int)i)
continue;
switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
@@ -705,8 +870,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
Offset, MaxAlign, Skew);
}
- // Then assign frame offsets to stack objects that are not used to spill
- // callee saved registers.
+ SmallVector<int, 8> ObjectsToAllocate;
+
+ // Then prepare to assign frame offsets to stack objects that are not used to
+ // spill callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
if (MFI->isObjectPreAllocated(i) &&
MFI->getUseLocalStackAllocationBlock())
@@ -717,14 +884,43 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (MFI->isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i)
+ if (MFI->getStackProtectorIndex() == (int)i ||
+ EHRegNodeFrameIndex == (int)i)
continue;
if (ProtectedObjs.count(i))
continue;
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ // Add the objects that we need to allocate to our working set.
+ ObjectsToAllocate.push_back(i);
}
+ // Allocate the EH registration node first if one is present.
+ if (EHRegNodeFrameIndex != INT_MAX)
+ AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
+ MaxAlign, Skew);
+
+ // Give the targets a chance to order the objects the way they like it.
+ if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
+ Fn.getTarget().Options.StackSymbolOrdering)
+ TFI.orderFrameObjects(Fn, ObjectsToAllocate);
+
+ // Keep track of which bytes in the fixed and callee-save range are used so we
+ // can use the holes when allocating later stack objects. Only do this if
+ // stack protector isn't being used and the target requests it and we're
+ // optimizing.
+ BitVector StackBytesFree;
+ if (!ObjectsToAllocate.empty() &&
+ Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+ computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
+ FixedCSEnd, StackBytesFree);
+
+ // Now walk the objects and actually assign base offsets to them.
+ for (auto &Object : ObjectsToAllocate)
+ if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
+ StackBytesFree))
+ AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
+
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
if (RS && !EarlyScavengingSlots) {
@@ -757,7 +953,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
- Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
+ Offset = alignTo(Offset, StackAlign, Skew);
}
// Update frame info to pretend that this is part of the stack...
@@ -851,7 +1047,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
- if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB);
bool InsideCallSequence = false;
@@ -860,38 +1056,31 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
if (I->getOpcode() == FrameSetupOpcode ||
I->getOpcode() == FrameDestroyOpcode) {
InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
- SPAdj += TII.getSPAdjust(I);
-
- MachineBasicBlock::iterator PrevI = BB->end();
- if (I != BB->begin()) PrevI = std::prev(I);
- TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+ SPAdj += TII.getSPAdjust(*I);
- // Visit the instructions created by eliminateCallFramePseudoInstr().
- if (PrevI == BB->end())
- I = BB->begin(); // The replaced instr was the first in the block.
- else
- I = std::next(PrevI);
+ I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
continue;
}
- MachineInstr *MI = I;
+ MachineInstr &MI = *I;
bool DoIncr = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (!MI->getOperand(i).isFI())
+ bool DidFinishLoop = true;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
continue;
// Frame indices in debug values are encoded in a target independent
// way with simply the frame index and offset rather than any
// target-specific addressing mode.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
- MachineOperand &Offset = MI->getOperand(1);
- Offset.setImm(Offset.getImm() +
- TFI->getFrameIndexReference(
- Fn, MI->getOperand(0).getIndex(), Reg));
- MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ MachineOperand &Offset = MI.getOperand(1);
+ Offset.setImm(
+ Offset.getImm() +
+ TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg));
+ MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
@@ -900,18 +1089,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
// implementation other than historical accident. The only
// remaining difference is the unconditional use of the stack
// pointer as the base register.
- if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
- assert((!MI->isDebugValue() || i == 0) &&
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI.isDebugValue() || i == 0) &&
"Frame indicies can only appear as the first operand of a "
"DBG_VALUE machine instruction");
unsigned Reg;
- MachineOperand &Offset = MI->getOperand(i + 1);
- const unsigned refOffset =
- TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
- Reg);
-
+ MachineOperand &Offset = MI.getOperand(i + 1);
+ int refOffset = TFI->getFrameIndexReferencePreferSP(
+ Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
Offset.setImm(Offset.getImm() + refOffset);
- MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+ MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
@@ -937,7 +1124,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
DoIncr = false;
}
- MI = nullptr;
+ DidFinishLoop = false;
break;
}
@@ -948,45 +1135,46 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
// Note that this must come after eliminateFrameIndex, because
// if I itself referred to a frame index, we shouldn't count its own
// adjustment.
- if (MI && InsideCallSequence)
+ if (DidFinishLoop && InsideCallSequence)
SPAdj += TII.getSPAdjust(MI);
if (DoIncr && I != BB->end()) ++I;
// Update register states.
- if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ if (RS && !FrameIndexVirtualScavenging && DidFinishLoop)
+ RS->forward(MI);
}
}
-/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
///
/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
/// iterate over the vreg use list, which at this point only contains machine
/// operands for which eliminateFrameIndex need a new scratch reg.
-void
-PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+static void
+doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
// Run through the instructions and find any virtual registers.
- for (MachineFunction::iterator BB = Fn.begin(),
- E = Fn.end(); BB != E; ++BB) {
- RS->enterBasicBlock(&*BB);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ RS->enterBasicBlock(MBB);
int SPAdj = 0;
- // The instruction stream may change in the loop, so check BB->end()
+ // The instruction stream may change in the loop, so check MBB.end()
// directly.
- for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
// We might end up here again with a NULL iterator if we scavenged a
// register for which we inserted spill code for definition by what was
- // originally the first instruction in BB.
+ // originally the first instruction in MBB.
if (I == MachineBasicBlock::iterator(nullptr))
- I = BB->begin();
+ I = MBB.begin();
- MachineInstr *MI = I;
+ const MachineInstr &MI = *I;
MachineBasicBlock::iterator J = std::next(I);
MachineBasicBlock::iterator P =
- I == BB->begin() ? MachineBasicBlock::iterator(nullptr)
+ I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
: std::prev(I);
// RS should process this instruction before we might scavenge at this
@@ -995,35 +1183,31 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// instruction are available, and defined registers are not.
RS->forward(I);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).isReg()) {
- MachineOperand &MO = MI->getOperand(i);
- unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MI->getOperand(i).isDef() &&
- "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
-
- ++NumScavengedRegs;
-
- // Replace this reference to the virtual register with the
- // scratch register.
- assert (ScratchReg && "Missing scratch register!");
- Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
-
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS->setRegUsed(ScratchReg);
- }
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MO.isDef() && "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert(ScratchReg && "Missing scratch register!");
+ MRI.replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setRegUsed(ScratchReg);
}
// If the scavenger needed to use one of its spill slots, the
@@ -1031,7 +1215,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// problem because we need the spill code before I: Move I to just
// prior to J.
if (I != std::prev(J)) {
- BB->splice(J, &*BB, I);
+ MBB.splice(J, &MBB, I);
// Before we move I, we need to prepare the RS to visit I again.
// Specifically, RS will assert if it sees uses of registers that
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 1f46417..804a4c3 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,16 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
using namespace llvm;
static const char *const PSVNames[] = {
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 16ff48e..93eeb9c 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -22,9 +22,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SparseBitVector.h"
-#endif
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -153,3 +150,12 @@ void RegAllocBase::allocatePhysRegs() {
}
}
}
+
+void RegAllocBase::postOptimization() {
+ spiller().postOptimization();
+ for (auto DeadInst : DeadRemats) {
+ LIS->RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
index 659b8f5..296ffe8 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -65,6 +65,12 @@ protected:
LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
RegAllocBase()
: TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
@@ -77,6 +83,10 @@ protected:
// physical register assignments.
void allocatePhysRegs();
+ // Include spiller post optimization and removing dead defs left because of
+ // rematerialization.
+ virtual void postOptimization();
+
// Get a temporary reference to a Spiller instance.
virtual Spiller &spiller() = 0;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index cfe367d..11dfda6 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);
}
return true;
@@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
@@ -283,6 +283,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
allocatePhysRegs();
+ postOptimization();
// Diagnostic output before rewriting
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 8d7a721..55fb33e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -25,13 +24,12 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
@@ -52,6 +50,7 @@ namespace {
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
isBulkSpilling(false) {}
+
private:
MachineFunction *MF;
MachineRegisterInfo *MRI;
@@ -159,6 +158,11 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
private:
bool runOnMachineFunction(MachineFunction &Fn) override;
void AllocateBasicBlock();
@@ -174,7 +178,7 @@ namespace {
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
void usePhysReg(MachineOperand&);
- void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+ void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState);
unsigned calcSpillCost(unsigned PhysReg) const;
void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
@@ -184,11 +188,11 @@ namespace {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
}
LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
- LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+ LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
unsigned Hint);
- LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
- LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint);
void spillAll(MachineBasicBlock::iterator MI);
bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
@@ -280,7 +284,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
if (LR.Dirty) {
// If this physreg is used by the instruction, we want to kill it on the
// instruction, not on the spill.
- bool SpillKill = LR.LastUse != MI;
+ bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
LR.Dirty = false;
DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
<< " in " << PrintReg(LR.PhysReg, TRI));
@@ -345,6 +349,11 @@ void RAFast::usePhysReg(MachineOperand &MO) {
unsigned PhysReg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
"Bad usePhysReg operand");
+
+ // Ignore undef uses.
+ if (MO.isUndef())
+ return;
+
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
case regDisabled:
@@ -404,7 +413,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
/// definePhysReg - Mark PhysReg as reserved or free after spilling any
/// virtregs. This is very similar to defineVirtReg except the physreg is
/// reserved instead of allocated.
-void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
RegState NewState) {
markRegUsedInInstr(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -512,7 +521,7 @@ RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
}
/// allocVirtReg - Allocate a physical register for VirtReg.
-RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
LiveRegMap::iterator LRI,
unsigned Hint) {
const unsigned VirtReg = LRI->VirtReg;
@@ -577,18 +586,19 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
}
// Nothing we can do. Report an error and keep going with a bad allocation.
- if (MI->isInlineAsm())
- MI->emitError("inline assembly requires more registers than available");
+ if (MI.isInlineAsm())
+ MI.emitError("inline assembly requires more registers than available");
else
- MI->emitError("ran out of registers during register allocation");
+ MI.emitError("ran out of registers during register allocation");
definePhysReg(MI, *AO.begin(), regFree);
return assignVirtToPhysReg(VirtReg, *AO.begin());
}
/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
-RAFast::LiveRegMap::iterator
-RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint) {
+RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
@@ -607,11 +617,11 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
} else if (LRI->LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
- if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
addKillFlag(*LRI);
}
assert(LRI->PhysReg && "Register not assigned");
- LRI->LastUse = MI;
+ LRI->LastUse = &MI;
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
markRegUsedInInstr(LRI->PhysReg);
@@ -619,15 +629,16 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
}
/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
-RAFast::LiveRegMap::iterator
-RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint) {
+RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- MachineOperand &MO = MI->getOperand(OpNum);
+ MachineOperand &MO = MI.getOperand(OpNum);
if (New) {
LRI = allocVirtReg(MI, LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
@@ -662,7 +673,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
MO.setIsDead(false);
}
assert(LRI->PhysReg && "Register not assigned");
- LRI->LastUse = MI;
+ LRI->LastUse = &MI;
LRI->LastOpNum = OpNum;
markRegUsedInInstr(LRI->PhysReg);
return LRI;
@@ -728,7 +739,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (ThroughRegs.count(PhysRegState[*AI]))
- definePhysReg(MI, *AI, regFree);
+ definePhysReg(*MI, *AI, regFree);
}
}
@@ -744,7 +755,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
<< DefIdx << ".\n");
- LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
unsigned PhysReg = LRI->PhysReg;
setPhysReg(MI, i, PhysReg);
// Note: we don't update the def operand yet. That would cause the normal
@@ -753,7 +764,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
// Reload the register, but don't assign to the operand just yet.
// That would confuse the later phys-def processing pass.
- LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
PartialDefs.push_back(LRI->PhysReg);
}
}
@@ -767,7 +778,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0);
unsigned PhysReg = LRI->PhysReg;
if (setPhysReg(MI, i, PhysReg))
VirtDead.push_back(Reg);
@@ -801,14 +812,14 @@ void RAFast::AllocateBasicBlock() {
// Add live-in registers as live.
for (const auto &LI : MBB->liveins())
if (MRI->isAllocatable(LI.PhysReg))
- definePhysReg(MII, LI.PhysReg, regReserved);
+ definePhysReg(*MII, LI.PhysReg, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
// Otherwise, sequentially allocate each instruction in the MBB.
while (MII != MBB->end()) {
- MachineInstr *MI = MII++;
+ MachineInstr *MI = &*MII++;
const MCInstrDesc &MCID = MI->getDesc();
DEBUG({
dbgs() << "\n>> " << *MI << "Regs:";
@@ -943,8 +954,8 @@ void RAFast::AllocateBasicBlock() {
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
- regFree : regReserved);
+ definePhysReg(*MI, Reg,
+ (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
hasEarlyClobbers = true;
} else
hasPhysDefs = true;
@@ -977,7 +988,7 @@ void RAFast::AllocateBasicBlock() {
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
- LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst);
unsigned PhysReg = LRI->PhysReg;
CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, i, PhysReg))
@@ -1027,10 +1038,10 @@ void RAFast::AllocateBasicBlock() {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved);
continue;
}
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+ LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc);
unsigned PhysReg = LRI->PhysReg;
if (setPhysReg(MI, i, PhysReg)) {
VirtDead.push_back(Reg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 945cb9e..c4d4b1e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
@@ -33,6 +32,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -44,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
@@ -55,14 +56,14 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges");
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
-static cl::opt<SplitEditor::ComplementSpillMode>
-SplitSpillMode("split-spill-mode", cl::Hidden,
- cl::desc("Spill mode for splitting live ranges"),
- cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
- clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
- clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
- clEnumValEnd),
- cl::init(SplitEditor::SM_Partition));
+static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
+ "split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Speed));
static cl::opt<unsigned>
LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
@@ -128,6 +129,7 @@ class RAGreedy : public MachineFunctionPass,
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
+ AliasAnalysis *AA;
// state
std::unique_ptr<Spiller> SpillerInstance;
@@ -954,22 +956,28 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
// Interference for the live-in value.
if (BI.LiveIn) {
- if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
- BC.Entry = SpillPlacement::MustSpill, ++Ins;
- else if (Intf.first() < BI.FirstInstr)
- BC.Entry = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.first() < BI.LastInstr)
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
+ BC.Entry = SpillPlacement::MustSpill;
+ ++Ins;
+ } else if (Intf.first() < BI.FirstInstr) {
+ BC.Entry = SpillPlacement::PrefSpill;
++Ins;
+ } else if (Intf.first() < BI.LastInstr) {
+ ++Ins;
+ }
}
// Interference for the live-out value.
if (BI.LiveOut) {
- if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
- BC.Exit = SpillPlacement::MustSpill, ++Ins;
- else if (Intf.last() > BI.LastInstr)
- BC.Exit = SpillPlacement::PrefSpill, ++Ins;
- else if (Intf.last() > BI.FirstInstr)
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
+ BC.Exit = SpillPlacement::MustSpill;
++Ins;
+ } else if (Intf.last() > BI.LastInstr) {
+ BC.Exit = SpillPlacement::PrefSpill;
+ ++Ins;
+ } else if (Intf.last() > BI.FirstInstr) {
+ ++Ins;
+ }
}
// Accumulate the total frequency of inserted spill code.
@@ -1392,8 +1400,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (i == BestCand || !GlobalCand[i].PhysReg)
continue;
unsigned Count = GlobalCand[i].LiveBundles.count();
- if (Count < WorstCount)
- Worst = i, WorstCount = Count;
+ if (Count < WorstCount) {
+ Worst = i;
+ WorstCount = Count;
+ }
}
--NumCands;
GlobalCand[Worst] = GlobalCand[NumCands];
@@ -1457,7 +1467,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
SmallVectorImpl<unsigned> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
// Prepare split editor.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
// Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1505,7 +1515,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
unsigned Reg = VirtReg.reg;
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1577,7 +1587,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Always enable split spill mode, since we're effectively spilling to a
// register.
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitEditor::SM_Size);
ArrayRef<SlotIndex> Uses = SA->getUseSlots();
@@ -1900,7 +1910,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
- LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit);
SE->openIntv();
@@ -2543,7 +2553,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
NewVRegs.push_back(VirtReg.reg);
} else {
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
- LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
@@ -2583,6 +2593,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
initializeCSRCost();
@@ -2591,7 +2602,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(LIS->dump());
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
- SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI));
+ SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI));
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
@@ -2601,6 +2612,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
+ postOptimization();
+
releaseMemory();
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index fd28b05..d1221ec 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -123,6 +123,12 @@ private:
RegSet VRegsToAlloc, EmptyIntervalVRegs;
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
/// \brief Finds the initial set of vreg intervals to allocate.
void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
@@ -146,6 +152,7 @@ private:
void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM) const;
+ void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
};
char RegAllocPBQP::ID = 0;
@@ -631,7 +638,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
VirtRegMap &VRM, Spiller &VRegSpiller) {
VRegsToAlloc.erase(VReg);
- LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
+ nullptr, &DeadRemats);
VRegSpiller.spill(LRE);
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -713,6 +721,16 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
}
}
+void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
+ VRegSpiller.postOptimization();
+ /// Remove dead defs because of rematerialization.
+ for (auto DeadInst : DeadRemats) {
+ LIS.RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
+
static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
unsigned NumInstr) {
// All intervals have a spill weight that is mostly proportional to the number
@@ -798,6 +816,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// Finalise allocation, allocate empty ranges.
finalizeAlloc(MF, LIS, VRM);
+ postOptimization(*VRegSpiller, LIS);
VRegsToAlloc.clear();
EmptyIntervalVRegs.clear();
@@ -839,7 +858,7 @@ void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
}
}
-void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
OS << "graph {\n";
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
new file mode 100644
index 0000000..50b8854
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -0,0 +1,142 @@
+//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass is simple MachineFunction pass which collects register usage
+/// details by iterating through each physical registers and checking
+/// MRI::isPhysRegUsed() then creates a RegMask based on this details.
+/// The pass then stores this RegMask in PhysicalRegisterUsageInfo.cpp
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+STATISTIC(NumCSROpt,
+ "Number of functions optimized for callee saved registers");
+
+namespace llvm {
+void initializeRegUsageInfoCollectorPass(PassRegistry &);
+}
+
+namespace {
+class RegUsageInfoCollector : public MachineFunctionPass {
+public:
+ RegUsageInfoCollector() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoCollectorPass(Registry);
+ }
+
+ const char *getPassName() const override {
+ return "Register Usage Information Collector Pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask,
+ unsigned PReg);
+};
+} // end of anonymous namespace
+
+char RegUsageInfoCollector::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+
+FunctionPass *llvm::createRegUsageInfoCollector() {
+ return new RegUsageInfoCollector();
+}
+
+void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI,
+ uint32_t *RegMask, unsigned PReg) {
+ // If PReg is clobbered then all of its alias are also clobbered.
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ RegMask[*AI / 32] &= ~(1u << *AI % 32);
+}
+
+void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetMachine &TM = MF.getTarget();
+
+ DEBUG(dbgs() << " -------------------- " << getPassName()
+ << " -------------------- \n");
+ DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+
+ std::vector<uint32_t> RegMask;
+
+ // Compute the size of the bit vector to represent all the registers.
+ // The bit vector is broken into 32-bit chunks, thus takes the ceil of
+ // the number of registers divided by 32 for the size.
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+ RegMask.resize(RegMaskSize, 0xFFFFFFFF);
+
+ const Function *F = MF.getFunction();
+
+ PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+
+ PRUI->setTargetMachine(&TM);
+
+ DEBUG(dbgs() << "Clobbered Registers: ");
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
+ if (MRI->isPhysRegModified(PReg, true))
+ markRegClobbered(TRI, &RegMask[0], PReg);
+
+ if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
+ const uint32_t *CallPreservedMask =
+ TRI->getCallPreservedMask(MF, F->getCallingConv());
+ // Set callee saved register as preserved.
+ for (unsigned i = 0; i < RegMaskSize; ++i)
+ RegMask[i] = RegMask[i] | CallPreservedMask[i];
+ } else {
+ ++NumCSROpt;
+ DEBUG(dbgs() << MF.getName()
+ << " function optimized for not having CSR.\n");
+ }
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
+ if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+ DEBUG(dbgs() << TRI->getName(PReg) << " ");
+
+ DEBUG(dbgs() << " \n----------------------------------------\n");
+
+ PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
new file mode 100644
index 0000000..7595661
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -0,0 +1,131 @@
+//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass iterates through MachineInstrs in a given MachineFunction and at
+/// each callsite queries RegisterUsageInfo for RegMask (calculated based on
+/// actual register allocation) of the callee function, if the RegMask detail
+/// is available then this pass will update the RegMask of the call instruction.
+/// This updated RegMask will be used by the register allocator while allocating
+/// the current MachineFunction.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <map>
+#include <string>
+
+namespace llvm {
+void initializeRegUsageInfoPropagationPassPass(PassRegistry &);
+}
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+#define RUIP_NAME "Register Usage Information Propagation"
+
+namespace {
+class RegUsageInfoPropagationPass : public MachineFunctionPass {
+
+public:
+ RegUsageInfoPropagationPass() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoPropagationPassPass(Registry);
+ }
+
+ const char *getPassName() const override { return RUIP_NAME; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ static char ID;
+
+private:
+ static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask())
+ MO.setRegMask(RegMask);
+ }
+ }
+};
+} // end of anonymous namespace
+char RegUsageInfoPropagationPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+
+FunctionPass *llvm::createRegUsageInfoPropPass() {
+ return new RegUsageInfoPropagationPass();
+}
+
+void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
+ const Module *M = MF.getFunction()->getParent();
+ PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+
+ DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
+ << " ++++++++++++++++++++ \n");
+ DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isCall())
+ continue;
+ DEBUG(dbgs()
+ << "Call Instruction Before Register Usage Info Propagation : \n");
+ DEBUG(dbgs() << MI << "\n");
+
+ auto UpdateRegMask = [&](const Function *F) {
+ const auto *RegMask = PRUI->getRegUsageInfo(F);
+ if (!RegMask)
+ return;
+ setRegMask(MI, &(*RegMask)[0]);
+ Changed = true;
+ };
+
+ MachineOperand &Operand = MI.getOperand(0);
+ if (Operand.isGlobal())
+ UpdateRegMask(cast<Function>(Operand.getGlobal()));
+ else if (Operand.isSymbol())
+ UpdateRegMask(M->getFunction(Operand.getSymbolName()));
+
+ DEBUG(dbgs()
+ << "Call Instruction After Register Usage Info Propagation : \n");
+ DEBUG(dbgs() << MI << "\n");
+ }
+ }
+
+ DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+ "++++++ \n");
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c1ff13e..617ece9 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -203,6 +203,16 @@ namespace {
/// make sure to set it to the correct physical subregister.
void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ /// If the given machine operand reads only undefined lanes add an undef
+ /// flag.
+ /// This can happen when undef uses were previously concealed by a copy
+ /// which we coalesced. Example:
+ /// %vreg0:sub0<def,read-undef> = ...
+ /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef
+ /// = use %vreg1:sub1 <-- hidden undef use
+ void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx);
+
/// Handle copies of undef values.
/// Returns true if @p CopyMI was a copy of an undef value and eliminated.
bool eliminateUndefCopy(MachineInstr *CopyMI);
@@ -467,7 +477,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
LiveInterval &IntB =
LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
// We have a non-trivially-coalescable copy with IntA being the source and
// IntB being the dest, thus this defines a value number in IntB. If the
@@ -642,7 +652,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// BValNo is a value number in B that is defined by a copy from A. 'B1' in
// the example above.
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
assert(BValNo != nullptr && BValNo->def == CopyIdx);
@@ -674,7 +684,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
// op#2<->op#3) of commute transformation should be considered/tried here.
unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
- if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx))
+ if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
return false;
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -692,7 +702,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
MachineInstr *UseMI = MO.getParent();
unsigned OpNo = &MO - &UseMI->getOperand(0);
- SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
if (US == IntA.end() || US->valno != AValNo)
continue;
@@ -708,7 +718,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
MachineInstr *NewMI =
- TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx);
+ TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return false;
if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
@@ -716,7 +726,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
!MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
return false;
if (NewMI != DefMI) {
- LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
+ LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
MachineBasicBlock::iterator Pos = DefMI;
MBB->insert(Pos, NewMI);
MBB->erase(DefMI);
@@ -746,7 +756,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
continue;
}
- SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
assert(US != IntA.end() && "Use must be live");
if (US->valno != AValNo)
@@ -784,7 +794,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
}
ErasedInstrs.insert(UseMI);
- LIS->RemoveMachineInstrFromMaps(UseMI);
+ LIS->RemoveMachineInstrFromMaps(*UseMI);
UseMI->eraseFromParent();
}
@@ -879,7 +889,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
return false;
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
- SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI);
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
assert(ValNo && "CopyMI input register not live");
if (ValNo->isPHIDef() || ValNo->isUnused())
@@ -891,9 +901,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
IsDefCopy = true;
return false;
}
- if (!TII->isAsCheapAsAMove(DefMI))
+ if (!TII->isAsCheapAsAMove(*DefMI))
return false;
- if (!TII->isTriviallyReMaterializable(DefMI, AA))
+ if (!TII->isTriviallyReMaterializable(*DefMI, AA))
return false;
if (!definesFullReg(*DefMI, SrcReg))
return false;
@@ -939,11 +949,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
}
+ DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
std::next(MachineBasicBlock::iterator(CopyMI));
- TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI);
- MachineInstr *NewMI = std::prev(MII);
+ TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
+ MachineInstr &NewMI = *std::prev(MII);
+ NewMI.setDebugLoc(DL);
// In a situation like the following:
// %vreg0:subreg = instr ; DefMI, subreg = DstIdx
@@ -952,7 +964,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// %vreg1 = instr
const TargetRegisterClass *NewRC = CP.getNewRC();
if (DstIdx != 0) {
- MachineOperand &DefMO = NewMI->getOperand(0);
+ MachineOperand &DefMO = NewMI.getOperand(0);
if (DefMO.getSubReg() == DstIdx) {
assert(SrcIdx == 0 && CP.isFlipped()
&& "Shouldn't have SrcIdx+DstIdx at this point");
@@ -967,7 +979,24 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
}
- LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+ // CopyMI may have implicit operands, save them so that we can transfer them
+ // over to the newly materialized instruction after CopyMI is removed.
+ SmallVector<MachineOperand, 4> ImplicitOps;
+ ImplicitOps.reserve(CopyMI->getNumOperands() -
+ CopyMI->getDesc().getNumOperands());
+ for (unsigned I = CopyMI->getDesc().getNumOperands(),
+ E = CopyMI->getNumOperands();
+ I != E; ++I) {
+ MachineOperand &MO = CopyMI->getOperand(I);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ ImplicitOps.push_back(MO);
+ }
+ }
+
+ LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
CopyMI->eraseFromParent();
ErasedInstrs.insert(CopyMI);
@@ -975,9 +1004,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
SmallVector<unsigned, 4> NewMIImplDefs;
- for (unsigned i = NewMI->getDesc().getNumOperands(),
- e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
+ for (unsigned i = NewMI.getDesc().getNumOperands(),
+ e = NewMI.getNumOperands();
+ i != e; ++i) {
+ MachineOperand &MO = NewMI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
assert(MO.isImplicit() && MO.isDead() &&
TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
@@ -986,7 +1016,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- unsigned NewIdx = NewMI->getOperand(0).getSubReg();
+ unsigned NewIdx = NewMI.getOperand(0).getSubReg();
if (DefRC != nullptr) {
if (NewIdx)
@@ -995,20 +1025,54 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
NewRC = TRI->getCommonSubClass(NewRC, DefRC);
assert(NewRC && "subreg chosen for remat incompatible with instruction");
}
+ // Remap subranges to new lanemask and change register class.
+ LiveInterval &DstInt = LIS->getInterval(DstReg);
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
+ }
MRI->setRegClass(DstReg, NewRC);
+ // Update machine operands and add flags.
updateRegDefsUses(DstReg, DstReg, DstIdx);
- NewMI->getOperand(0).setSubReg(NewIdx);
- } else if (NewMI->getOperand(0).getReg() != CopyDstReg) {
+ NewMI.getOperand(0).setSubReg(NewIdx);
+ // Add dead subregister definitions if we are defining the whole register
+ // but only part of it is live.
+ // This could happen if the rematerialization instruction is rematerializing
+ // more than actually is used in the register.
+ // An example would be:
+ // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
+ // ; Copying only part of the register here, but the rest is undef.
+ // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit
+ // ==>
+ // ; Materialize all the constants but only using one
+ // vreg2 = LOAD_CONSTANTS 5, 8
+ //
+ // at this point for the part that wasn't defined before we could have
+ // subranges missing the definition.
+ if (NewIdx == 0 && DstInt.hasSubRanges()) {
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ SlotIndex DefIndex =
+ CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
+ VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if (!SR.liveAt(DefIndex))
+ SR.createDeadDef(DefIndex, Alloc);
+ MaxMask &= ~SR.LaneMask;
+ }
+ if (MaxMask != 0) {
+ LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
+ SR->createDeadDef(DefIndex, Alloc);
+ }
+ }
+ } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
"Only expect virtual or physical registers in remat");
- NewMI->getOperand(0).setIsDead(true);
- NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg,
- true /*IsDef*/,
- true /*IsImp*/,
- false /*IsKill*/));
+ NewMI.getOperand(0).setIsDead(true);
+ NewMI.addOperand(MachineOperand::CreateReg(
+ CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
// Record small dead def live-ranges for all the subregisters
// of the destination register.
// Otherwise, variables that live through may miss some
@@ -1026,28 +1090,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// no live-ranges would have been created for ECX.
// Fix that!
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
- for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI);
+ for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
Units.isValid(); ++Units)
if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
- if (NewMI->getOperand(0).getSubReg())
- NewMI->getOperand(0).setIsUndef();
+ if (NewMI.getOperand(0).getSubReg())
+ NewMI.getOperand(0).setIsUndef();
- // CopyMI may have implicit operands, transfer them over to the newly
- // rematerialized instruction. And update implicit def interval valnos.
- for (unsigned i = CopyMI->getDesc().getNumOperands(),
- e = CopyMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = CopyMI->getOperand(i);
- if (MO.isReg()) {
- assert(MO.isImplicit() && "No explicit operands after implict operands.");
- // Discard VReg implicit defs.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- NewMI->addOperand(MO);
- }
- }
- }
+ // Transfer over implicit operands to the rematerialized instruction.
+ for (MachineOperand &MO : ImplicitOps)
+ NewMI.addOperand(MO);
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
@@ -1057,7 +1111,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
- DEBUG(dbgs() << "Remat: " << *NewMI);
+ DEBUG(dbgs() << "Remat: " << NewMI);
++NumReMats;
// The source interval can become smaller because we removed a use.
@@ -1093,7 +1147,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
+ SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
// CopyMI is undef iff SrcReg is not live before the instruction.
if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
@@ -1136,7 +1190,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
if (MO.isDef() /*|| MO.isUndef()*/)
continue;
const MachineInstr &MI = *MO.getParent();
- SlotIndex UseIdx = LIS->getInstructionIndex(&MI);
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI);
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
bool isLive;
if (UseMask != ~0u && DstLI.hasSubRanges()) {
@@ -1159,12 +1213,51 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
return true;
}
+void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ if (MO.isDef())
+ Mask = ~Mask;
+ bool IsUndef = true;
+ for (const LiveInterval::SubRange &S : Int.subranges()) {
+ if ((S.LaneMask & Mask) == 0)
+ continue;
+ if (S.liveAt(UseIdx)) {
+ IsUndef = false;
+ break;
+ }
+ }
+ if (IsUndef) {
+ MO.setIsUndef(true);
+ // We found out some subregister use is actually reading an undefined
+ // value. In some cases the whole vreg has become undefined at this
+ // point so we have to potentially shrink the main range if the
+ // use was ending a live segment there.
+ LiveQueryResult Q = Int.Query(UseIdx);
+ if (Q.valueOut() == nullptr)
+ ShrinkMainRange = true;
+ }
+}
+
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
+ if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
+ for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 || MO.isUndef())
+ continue;
+ MachineInstr &MI = *MO.getParent();
+ if (MI.isDebugValue())
+ continue;
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubReg);
+ }
+ }
+
SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
@@ -1186,7 +1279,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// If SrcReg wasn't read, it may still be the case that DstReg is live-in
// because SrcReg is a sub-register.
if (DstInt && !Reads && SubIdx)
- Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -1206,30 +1299,11 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
}
- LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx);
- bool IsUndef = true;
SlotIndex MIIdx = UseMI->isDebugValue()
- ? LIS->getSlotIndexes()->getIndexBefore(UseMI)
- : LIS->getInstructionIndex(UseMI);
+ ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
+ : LIS->getInstructionIndex(*UseMI);
SlotIndex UseIdx = MIIdx.getRegSlot(true);
- for (LiveInterval::SubRange &S : DstInt->subranges()) {
- if ((S.LaneMask & Mask) == 0)
- continue;
- if (S.liveAt(UseIdx)) {
- IsUndef = false;
- break;
- }
- }
- if (IsUndef) {
- MO.setIsUndef(true);
- // We found out some subregister use is actually reading an undefined
- // value. In some cases the whole vreg has become undefined at this
- // point so we have to potentially shrink the main range if the
- // use was ending a live segment there.
- LiveQueryResult Q = DstInt->Query(MIIdx);
- if (Q.valueOut() == nullptr)
- ShrinkMainRange = true;
- }
+ addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
}
if (DstIsPhys)
@@ -1241,7 +1315,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugValue())
- dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
+ dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
dbgs() << *UseMI;
});
}
@@ -1267,7 +1341,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
- DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+ DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
CoalescerPair CP(*TRI);
if (!CP.setRegisters(CopyMI)) {
@@ -1303,7 +1377,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// Eliminate undefs.
if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
- LIS->RemoveMachineInstrFromMaps(CopyMI);
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
CopyMI->eraseFromParent();
return false; // Not coalescable.
}
@@ -1314,7 +1388,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (CP.getSrcReg() == CP.getDstReg()) {
LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
- const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI);
+ const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
LiveQueryResult LRQ = LI.Query(CopyIdx);
if (VNInfo *DefVNI = LRQ.valueDefined()) {
VNInfo *ReadVNI = LRQ.valueIn();
@@ -1332,7 +1406,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
- LIS->RemoveMachineInstrFromMaps(CopyMI);
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
CopyMI->eraseFromParent();
return true;
}
@@ -1393,7 +1467,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (!CP.isPartial() && !CP.isPhys()) {
if (adjustCopiesBackFrom(CP, CopyMI) ||
removeCopyByCommutingDef(CP, CopyMI)) {
- LIS->RemoveMachineInstrFromMaps(CopyMI);
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
CopyMI->eraseFromParent();
DEBUG(dbgs() << "\tTrivial!\n");
return true;
@@ -1507,8 +1581,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
- const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
- const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+ const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
// We checked above that there are no interfering defs of the physical
// register. However, for this case, where we intent to move up the def of
@@ -1544,7 +1618,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
}
}
- LIS->RemoveMachineInstrFromMaps(CopyMI);
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
CopyMI->eraseFromParent();
// We don't track kills for reserved registers.
@@ -1775,7 +1849,7 @@ class JoinVals {
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
- bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const;
+ bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
/// Determine if ValNo is a copy of a value number in LR or Other.LR that will
/// be pruned:
@@ -2025,7 +2099,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// IMPLICIT_DEF instructions behind, and there is nothing wrong with it
// technically.
//
- // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
// to erase the IMPLICIT_DEF instruction.
if (OtherV.ErasableImplicitDef && DefMI &&
DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
@@ -2219,11 +2293,11 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
return true;
}
-bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
+bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
return false;
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
continue;
if (!MO.readsReg())
@@ -2278,7 +2352,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
unsigned TaintNum = 0;
for(;;) {
assert(MI != MBB->end() && "Bad LastMI");
- if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
+ if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
return false;
}
@@ -2457,7 +2531,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
ErasedInstrs.insert(MI);
DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
- LIS->RemoveMachineInstrFromMaps(MI);
+ LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
break;
}
@@ -2838,16 +2912,15 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
}
else {
SmallVector<MachineInstr*, 2> Terminals;
- for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ++MII)
- if (MII->isCopyLike()) {
- if (applyTerminalRule(*MII))
- Terminals.push_back(&(*MII));
+ for (MachineInstr &MII : *MBB)
+ if (MII.isCopyLike()) {
+ if (applyTerminalRule(MII))
+ Terminals.push_back(&MII);
else
- WorkList.push_back(MII);
- }
- // Append the copies evicted by the terminal rule at the end of the list.
- WorkList.append(Terminals.begin(), Terminals.end());
+ WorkList.push_back(&MII);
+ }
+ // Append the copies evicted by the terminal rule at the end of the list.
+ WorkList.append(Terminals.begin(), Terminals.end());
}
// Try coalescing the collected copies immediately, and remove the nulls.
// This prevents the WorkList from getting too large since most copies are
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index f33dc3e..a21d6c1 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -24,7 +24,13 @@ using namespace llvm;
/// Increase pressure for each pressure set provided by TargetRegisterInfo.
static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- PSetIterator PSetI) {
+ const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ assert((PrevMask & ~NewMask) == 0 && "Must not remove bits");
+ if (PrevMask != 0 || NewMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI)
CurrSetPressure[*PSetI] += Weight;
@@ -32,7 +38,13 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- PSetIterator PSetI) {
+ const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ if (NewMask != 0 || PrevMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
unsigned Weight = PSetI.getWeight();
for (; PSetI.isValid(); ++PSetI) {
assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
@@ -59,12 +71,20 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Max Pressure: ";
dumpRegSetPressure(MaxSetPressure, TRI);
dbgs() << "Live In: ";
- for (unsigned Reg : LiveInRegs)
- dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
+ for (const RegisterMaskPair &P : LiveInRegs) {
+ dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ if (P.LaneMask != ~0u)
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
dbgs() << '\n';
dbgs() << "Live Out: ";
- for (unsigned Reg : LiveOutRegs)
- dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
+ for (const RegisterMaskPair &P : LiveOutRegs) {
+ dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ if (P.LaneMask != ~0u)
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
dbgs() << '\n';
}
@@ -89,24 +109,25 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
dbgs() << '\n';
}
-/// Increase the current pressure as impacted by these registers and bump
-/// the high water mark if needed.
-void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned RegUnit : RegUnits) {
- PSetIterator PSetI = MRI->getPressureSets(RegUnit);
- unsigned Weight = PSetI.getWeight();
- for (; PSetI.isValid(); ++PSetI) {
- CurrSetPressure[*PSetI] += Weight;
- P.MaxSetPressure[*PSetI] =
- std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
- }
+void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ if (PreviousMask != 0 || NewMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ CurrSetPressure[*PSetI] += Weight;
+ P.MaxSetPressure[*PSetI] =
+ std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
}
}
-/// Simply decrease the current pressure as impacted by these registers.
-void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) {
- for (unsigned RegUnit : RegUnits)
- decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit));
+void RegPressureTracker::decreaseRegPressure(unsigned RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask);
}
/// Clear the result so it can be used for another round of pressure tracking.
@@ -201,8 +222,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
const LiveIntervals *lis,
const MachineBasicBlock *mbb,
MachineBasicBlock::const_iterator pos,
- bool ShouldTrackUntiedDefs)
-{
+ bool TrackLaneMasks, bool TrackUntiedDefs) {
reset();
MF = mf;
@@ -210,7 +230,8 @@ void RegPressureTracker::init(const MachineFunction *mf,
RCI = rci;
MRI = &MF->getRegInfo();
MBB = mbb;
- TrackUntiedDefs = ShouldTrackUntiedDefs;
+ this->TrackUntiedDefs = TrackUntiedDefs;
+ this->TrackLaneMasks = TrackLaneMasks;
if (RequireIntervals) {
assert(lis && "IntervalPressure requires LiveIntervals");
@@ -250,7 +271,7 @@ SlotIndex RegPressureTracker::getCurrSlot() const {
++IdxPos;
if (IdxPos == MBB->end())
return LIS->getMBBEndIdx(MBB);
- return LIS->getInstructionIndex(IdxPos).getRegSlot();
+ return LIS->getInstructionIndex(*IdxPos).getRegSlot();
}
/// Set the boundary for the top of the region and summarize live ins.
@@ -297,20 +318,106 @@ void RegPressureTracker::closeRegion() {
void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
- for (unsigned Reg : P.LiveOutRegs) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)
- && !RPTracker.hasUntiedDef(Reg)) {
- increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg));
+ for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
+ unsigned RegUnit = Pair.RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit)
+ && !RPTracker.hasUntiedDef(RegUnit))
+ increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask);
+ }
+}
+
+static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
+ unsigned RegUnit) {
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end())
+ return 0;
+ return I->LaneMask;
+}
+
+static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ unsigned RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask != 0);
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(Pair);
+ } else {
+ I->LaneMask |= Pair.LaneMask;
+ }
+}
+
+static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ unsigned RegUnit) {
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(RegisterMaskPair(RegUnit, 0));
+ } else {
+ I->LaneMask = 0;
+ }
+}
+
+static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ unsigned RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask != 0);
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I != RegUnits.end()) {
+ I->LaneMask &= ~Pair.LaneMask;
+ if (I->LaneMask == 0)
+ RegUnits.erase(I);
+ }
+}
+
+static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
+ SlotIndex Pos, LaneBitmask SafeDefault,
+ bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ const LiveInterval &LI = LIS.getInterval(RegUnit);
+ LaneBitmask Result = 0;
+ if (TrackLaneMasks && LI.hasSubRanges()) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (Property(SR, Pos))
+ Result |= SR.LaneMask;
+ }
+ } else if (Property(LI, Pos)) {
+ Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u;
}
+
+ return Result;
+ } else {
+ const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+ // Be prepared for missing liveranges: We usually do not compute liveranges
+ // for physical registers on targets with many registers (GPUs).
+ if (LR == nullptr)
+ return SafeDefault;
+ return Property(*LR, Pos) ? ~0u : 0;
}
}
-/// \brief Convenient wrapper for checking membership in RegisterOperands.
-/// (std::count() doesn't have an early exit).
-static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
- return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
+static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, unsigned RegUnit,
+ SlotIndex Pos) {
+ return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
}
+
namespace {
/// Collect this instruction's unique uses and defs into SmallVectors for
@@ -325,19 +432,25 @@ class RegisterOperandsCollector {
RegisterOperandsCollector(RegisterOperands &RegOpers,
const TargetRegisterInfo &TRI,
- const MachineRegisterInfo &MRI,
- bool IgnoreDead)
+ const MachineRegisterInfo &MRI, bool IgnoreDead)
: RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
void collectInstr(const MachineInstr &MI) const {
- for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI)
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
collectOperand(*OperI);
// Remove redundant physreg dead defs.
- SmallVectorImpl<unsigned>::iterator I =
- std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
- std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
- RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
+ }
+
+ void collectInstrLanes(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ collectOperandLanes(*OperI);
+
+ // Remove redundant physreg dead defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
}
/// Push this operand's register onto the correct vectors.
@@ -345,28 +458,65 @@ class RegisterOperandsCollector {
if (!MO.isReg() || !MO.getReg())
return;
unsigned Reg = MO.getReg();
- if (MO.readsReg())
- pushRegUnits(Reg, RegOpers.Uses);
- if (MO.isDef()) {
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushReg(Reg, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Subregister definitions may imply a register read.
+ if (MO.readsReg())
+ pushReg(Reg, RegOpers.Uses);
+
if (MO.isDead()) {
if (!IgnoreDead)
- pushRegUnits(Reg, RegOpers.DeadDefs);
+ pushReg(Reg, RegOpers.DeadDefs);
} else
- pushRegUnits(Reg, RegOpers.Defs);
+ pushReg(Reg, RegOpers.Defs);
}
}
- void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const {
+ void pushReg(unsigned Reg,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (containsReg(RegUnits, Reg))
- return;
- RegUnits.push_back(Reg);
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
- if (containsReg(RegUnits, *Units))
- continue;
- RegUnits.push_back(*Units);
- }
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+ }
+ }
+
+ void collectOperandLanes(const MachineOperand &MO) const {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ unsigned Reg = MO.getReg();
+ unsigned SubRegIdx = MO.getSubReg();
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Treat read-undef subreg defs as definitions of the whole register.
+ if (MO.isUndef())
+ SubRegIdx = 0;
+
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushRegLanes(Reg, SubRegIdx, RegOpers.DeadDefs);
+ } else
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Defs);
+ }
+ }
+
+ void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask LaneMask = SubRegIdx != 0
+ ? TRI.getSubRegIndexLaneMask(SubRegIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
}
}
@@ -378,24 +528,26 @@ class RegisterOperandsCollector {
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
- bool IgnoreDead) {
+ bool TrackLaneMasks, bool IgnoreDead) {
RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
- Collector.collectInstr(MI);
+ if (TrackLaneMasks)
+ Collector.collectInstrLanes(MI);
+ else
+ Collector.collectInstr(MI);
}
void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
const LiveIntervals &LIS) {
- SlotIndex SlotIdx = LIS.getInstructionIndex(&MI);
- for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin();
- RI != Defs.end(); /*empty*/) {
- unsigned Reg = *RI;
+ SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
+ for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
+ unsigned Reg = RI->RegUnit;
const LiveRange *LR = getLiveRange(LIS, Reg);
if (LR != nullptr) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
if (LRQ.isDeadDef()) {
// LiveIntervals knows this is a dead even though it's MachineOperand is
// not flagged as such.
- DeadDefs.push_back(Reg);
+ DeadDefs.push_back(*RI);
RI = Defs.erase(RI);
continue;
}
@@ -404,6 +556,52 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
}
}
+void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ SlotIndex Pos,
+ MachineInstr *AddFlagsMI) {
+ for (auto I = Defs.begin(); I != Defs.end(); ) {
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getDeadSlot());
+ // If the the def is all that is live after the instruction, then in case
+ // of a subregister def we need a read-undef flag.
+ unsigned RegUnit = I->RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0)
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+
+ LaneBitmask ActualDef = I->LaneMask & LiveAfter;
+ if (ActualDef == 0) {
+ I = Defs.erase(I);
+ } else {
+ I->LaneMask = ActualDef;
+ ++I;
+ }
+ }
+ for (auto I = Uses.begin(); I != Uses.end(); ) {
+ LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getBaseIndex());
+ LaneBitmask LaneMask = I->LaneMask & LiveBefore;
+ if (LaneMask == 0) {
+ I = Uses.erase(I);
+ } else {
+ I->LaneMask = LaneMask;
+ ++I;
+ }
+ }
+ if (AddFlagsMI != nullptr) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned RegUnit = P.RegUnit;
+ if (!TargetRegisterInfo::isVirtualRegister(RegUnit))
+ continue;
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
+ Pos.getDeadSlot());
+ if (LiveAfter == 0)
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+ }
+ }
+}
+
/// Initialize an array of N PressureDiffs.
void PressureDiffs::init(unsigned N) {
Size = N;
@@ -421,11 +619,11 @@ void PressureDiffs::addInstruction(unsigned Idx,
const MachineRegisterInfo &MRI) {
PressureDiff &PDiff = (*this)[Idx];
assert(!PDiff.begin()->isValid() && "stale PDiff");
- for (unsigned Reg : RegOpers.Defs)
- PDiff.addPressureChange(Reg, true, &MRI);
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ PDiff.addPressureChange(P.RegUnit, true, &MRI);
- for (unsigned Reg : RegOpers.Uses)
- PDiff.addPressureChange(Reg, false, &MRI);
+ for (const RegisterMaskPair &P : RegOpers.Uses)
+ PDiff.addPressureChange(P.RegUnit, false, &MRI);
}
/// Add a change in pressure to the pressure diff of a given instruction.
@@ -465,33 +663,58 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
}
/// Force liveness of registers.
-void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
- for (unsigned Reg : Regs) {
- if (LiveRegs.insert(Reg))
- increaseRegPressure(Reg);
+void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
+ for (const RegisterMaskPair &P : Regs) {
+ LaneBitmask PrevMask = LiveRegs.insert(P);
+ LaneBitmask NewMask = PrevMask | P.LaneMask;
+ increaseRegPressure(P.RegUnit, PrevMask, NewMask);
}
}
-/// Add Reg to the live in set and increase max pressure.
-void RegPressureTracker::discoverLiveIn(unsigned Reg) {
- assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
- if (containsReg(P.LiveInRegs, Reg))
- return;
+void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
+ SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
+ assert(Pair.LaneMask != 0);
+
+ unsigned RegUnit = Pair.RegUnit;
+ auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(),
+ [RegUnit](const RegisterMaskPair &Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ LaneBitmask PrevMask;
+ LaneBitmask NewMask;
+ if (I == LiveInOrOut.end()) {
+ PrevMask = 0;
+ NewMask = Pair.LaneMask;
+ LiveInOrOut.push_back(Pair);
+ } else {
+ PrevMask = I->LaneMask;
+ NewMask = PrevMask | Pair.LaneMask;
+ I->LaneMask = NewMask;
+ }
+ increaseSetPressure(P.MaxSetPressure, *MRI, RegUnit, PrevMask, NewMask);
+}
- // At live in discovery, unconditionally increase the high water mark.
- P.LiveInRegs.push_back(Reg);
- increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg));
+void RegPressureTracker::discoverLiveIn(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveInRegs);
}
-/// Add Reg to the live out set and increase max pressure.
-void RegPressureTracker::discoverLiveOut(unsigned Reg) {
- assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
- if (containsReg(P.LiveOutRegs, Reg))
- return;
+void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveOutRegs);
+}
- // At live out discovery, unconditionally increase the high water mark.
- P.LiveOutRegs.push_back(Reg);
- increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg));
+void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ increaseRegPressure(Reg, LiveMask, BumpedMask);
+ }
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ decreaseRegPressure(Reg, BumpedMask, LiveMask);
+ }
}
/// Recede across the previous instruction. If LiveUses is provided, record any
@@ -500,48 +723,88 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
/// difference pointer is provided record the changes is pressure caused by this
/// instruction independent of liveness.
void RegPressureTracker::recede(const RegisterOperands &RegOpers,
- SmallVectorImpl<unsigned> *LiveUses) {
+ SmallVectorImpl<RegisterMaskPair> *LiveUses) {
assert(!CurrPos->isDebugValue());
// Boost pressure for all dead defs together.
- increaseRegPressure(RegOpers.DeadDefs);
- decreaseRegPressure(RegOpers.DeadDefs);
+ bumpDeadDefs(RegOpers.DeadDefs);
// Kill liveness at live defs.
// TODO: consider earlyclobbers?
- for (unsigned Reg : RegOpers.Defs) {
- if (LiveRegs.erase(Reg))
- decreaseRegPressure(Reg);
- else
- discoverLiveOut(Reg);
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned Reg = Def.RegUnit;
+
+ LaneBitmask PreviousMask = LiveRegs.erase(Def);
+ LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
+
+ LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
+ if (LiveOut != 0) {
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ // Retroactively model effects on pressure of the live out lanes.
+ increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut);
+ PreviousMask = LiveOut;
+ }
+
+ if (NewMask == 0) {
+ // Add a 0 entry to LiveUses as a marker that the complete vreg has become
+ // dead.
+ if (TrackLaneMasks && LiveUses != nullptr)
+ setRegZero(*LiveUses, Reg);
+ }
+
+ decreaseRegPressure(Reg, PreviousMask, NewMask);
}
SlotIndex SlotIdx;
if (RequireIntervals)
- SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
// Generate liveness for uses.
- for (unsigned Reg : RegOpers.Uses) {
- if (!LiveRegs.contains(Reg)) {
- // Adjust liveouts if LiveIntervals are available.
- if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(*LIS, Reg);
- if (LR) {
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- if (!LRQ.isKill() && !LRQ.valueDefined())
- discoverLiveOut(Reg);
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ assert(Use.LaneMask != 0);
+ LaneBitmask PreviousMask = LiveRegs.insert(Use);
+ LaneBitmask NewMask = PreviousMask | Use.LaneMask;
+ if (NewMask == PreviousMask)
+ continue;
+
+ // Did the register just become live?
+ if (PreviousMask == 0) {
+ if (LiveUses != nullptr) {
+ if (!TrackLaneMasks) {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ auto I = std::find_if(LiveUses->begin(), LiveUses->end(),
+ [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
+ bool IsRedef = I != LiveUses->end();
+ if (IsRedef) {
+ // ignore re-defs here...
+ assert(I->LaneMask == 0);
+ removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ }
}
}
- increaseRegPressure(Reg);
- LiveRegs.insert(Reg);
- if (LiveUses && !containsReg(*LiveUses, Reg))
- LiveUses->push_back(Reg);
+
+ // Discover live outs if this may be the first occurance of this register.
+ if (RequireIntervals) {
+ LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
+ if (LiveOut != 0)
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ }
}
+
+ increaseRegPressure(Reg, PreviousMask, NewMask);
}
if (TrackUntiedDefs) {
- for (unsigned Reg : RegOpers.Defs) {
- if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg))
- UntiedDefs.insert(Reg);
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned RegUnit = Def.RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0)
+ UntiedDefs.insert(RegUnit);
}
}
}
@@ -562,29 +825,32 @@ void RegPressureTracker::recedeSkipDebugValues() {
SlotIndex SlotIdx;
if (RequireIntervals)
- SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
// Open the top of the region using slot indexes.
if (RequireIntervals && isTopClosed())
static_cast<IntervalPressure&>(P).openTop(SlotIdx);
}
-void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) {
+void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
recedeSkipDebugValues();
const MachineInstr &MI = *CurrPos;
RegisterOperands RegOpers;
- RegOpers.collect(MI, *TRI, *MRI);
- if (RequireIntervals)
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ } else if (RequireIntervals) {
RegOpers.detectDeadDefs(MI, *LIS);
+ }
recede(RegOpers, LiveUses);
}
/// Advance across the current instruction.
-void RegPressureTracker::advance() {
+void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
assert(!TrackUntiedDefs && "unsupported mode");
-
assert(CurrPos != MBB->end());
if (!isTopClosed())
closeTop();
@@ -601,39 +867,34 @@ void RegPressureTracker::advance() {
static_cast<RegionPressure&>(P).openBottom(CurrPos);
}
- RegisterOperands RegOpers;
- RegOpers.collect(*CurrPos, *TRI, *MRI);
-
- for (unsigned Reg : RegOpers.Uses) {
- // Discover live-ins.
- bool isLive = LiveRegs.contains(Reg);
- if (!isLive)
- discoverLiveIn(Reg);
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
+ if (LiveIn != 0) {
+ discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
+ increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
+ LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
+ }
// Kill liveness at last uses.
- bool lastUse = false;
if (RequireIntervals) {
- const LiveRange *LR = getLiveRange(*LIS, Reg);
- lastUse = LR && LR->Query(SlotIdx).isKill();
- } else {
- // Allocatable physregs are always single-use before register rewriting.
- lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask != 0) {
+ LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
+ decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
+ }
}
- if (lastUse && isLive) {
- LiveRegs.erase(Reg);
- decreaseRegPressure(Reg);
- } else if (!lastUse && !isLive)
- increaseRegPressure(Reg);
}
// Generate liveness for defs.
- for (unsigned Reg : RegOpers.Defs) {
- if (LiveRegs.insert(Reg))
- increaseRegPressure(Reg);
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ LaneBitmask PreviousMask = LiveRegs.insert(Def);
+ LaneBitmask NewMask = PreviousMask | Def.LaneMask;
+ increaseRegPressure(Def.RegUnit, PreviousMask, NewMask);
}
// Boost pressure for all dead defs together.
- increaseRegPressure(RegOpers.DeadDefs);
- decreaseRegPressure(RegOpers.DeadDefs);
+ bumpDeadDefs(RegOpers.DeadDefs);
// Find the next instruction.
do
@@ -641,6 +902,17 @@ void RegPressureTracker::advance() {
while (CurrPos != MBB->end() && CurrPos->isDebugValue());
}
+void RegPressureTracker::advance() {
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = getCurrSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ }
+ advance(RegOpers);
+}
+
/// Find the max change in excess pressure across all sets.
static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
ArrayRef<unsigned> NewPressureVec,
@@ -728,22 +1000,38 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+
// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true);
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true);
assert(RegOpers.DeadDefs.size() == 0);
- if (RequireIntervals)
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ else if (RequireIntervals)
RegOpers.detectDeadDefs(*MI, *LIS);
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ bumpDeadDefs(RegOpers.DeadDefs);
+
// Kill liveness at live defs.
- for (unsigned Reg : RegOpers.Defs) {
- if (!containsReg(RegOpers.Uses, Reg))
- decreaseRegPressure(Reg);
+ for (const RegisterMaskPair &P : RegOpers.Defs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
+ LaneBitmask DefLanes = P.LaneMask;
+ LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes;
+ decreaseRegPressure(Reg, LiveLanes, LiveAfter);
}
// Generate liveness for uses.
- for (unsigned Reg : RegOpers.Uses) {
- if (!LiveRegs.contains(Reg))
- increaseRegPressure(Reg);
+ for (const RegisterMaskPair &P : RegOpers.Uses) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
+ increaseRegPressure(Reg, LiveLanes, LiveAfter);
}
}
@@ -888,15 +1176,58 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
}
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
-static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx,
- SlotIndex NextUseIdx, const MachineRegisterInfo &MRI,
- const LiveIntervals *LIS) {
- for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) {
- SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot();
- if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
- return true;
+/// The query starts with a lane bitmask which gets lanes/bits removed for every
+/// use we find.
+static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals *LIS) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) {
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LastUseMask &= ~UseMask;
+ if (LastUseMask == 0)
+ return 0;
+ }
}
- return false;
+ return LastUseMask;
+}
+
+LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
+}
+
+LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
+ Pos.getBaseIndex(), 0,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->end == Pos.getRegSlot();
+ });
+}
+
+LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->start < Pos.getRegSlot(true) &&
+ S->end != Pos.getDeadSlot();
+ });
}
/// Record the downward impact of a single instruction on current register
@@ -908,39 +1239,49 @@ static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx,
void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
- // Account for register pressure similar to RegPressureTracker::recede().
- RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, *MRI);
-
- // Kill liveness at last uses. Assume allocatable physregs are single-use
- // rather than checking LiveIntervals.
SlotIndex SlotIdx;
if (RequireIntervals)
- SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
- for (unsigned Reg : RegOpers.Uses) {
- if (RequireIntervals) {
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+
+ if (RequireIntervals) {
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask == 0)
+ continue;
+ // The LastUseMask is queried from the liveness information of instruction
+ // which may be further down the schedule. Some lanes may actually not be
+ // last uses for the current position.
// FIXME: allow the caller to pass in the list of vreg uses that remain
// to be bottom-scheduled to avoid searching uses at each query.
SlotIndex CurrIdx = getCurrSlot();
- const LiveRange *LR = getLiveRange(*LIS, Reg);
- if (LR) {
- LiveQueryResult LRQ = LR->Query(SlotIdx);
- if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS))
- decreaseRegPressure(Reg);
- }
- } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- // Allocatable physregs are always single-use before register rewriting.
- decreaseRegPressure(Reg);
+ LastUseMask
+ = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
+ if (LastUseMask == 0)
+ continue;
+
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask & ~LastUseMask;
+ decreaseRegPressure(Reg, LiveMask, NewMask);
}
}
// Generate liveness for defs.
- increaseRegPressure(RegOpers.Defs);
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned Reg = Def.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask | Def.LaneMask;
+ increaseRegPressure(Reg, LiveMask, NewMask);
+ }
// Boost pressure for all dead defs together.
- increaseRegPressure(RegOpers.DeadDefs);
- decreaseRegPressure(RegOpers.DeadDefs);
+ bumpDeadDefs(RegOpers.DeadDefs);
}
/// Consider the pressure increase caused by traversing this instruction
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 8fa1bf7..6b80179 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -7,10 +7,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the machine register scavenger. It can provide
-// information, such as unused registers, at any point in a machine basic block.
-// It also provides a mechanism to make registers available by evicting them to
-// spill slots.
+/// \file
+/// This file implements the machine register scavenger. It can provide
+/// information, such as unused registers, at any point in a machine basic
+/// block. It also provides a mechanism to make registers available by evicting
+/// them to spill slots.
//
//===----------------------------------------------------------------------===//
@@ -30,7 +31,6 @@ using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
-/// setUsed - Set the register units of this register as used.
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
LaneBitmask UnitMask = (*RUI).second;
@@ -49,9 +49,6 @@ void RegScavenger::initRegState() {
// All register units start out unused.
RegUnitsAvailable.set();
- if (!MBB)
- return;
-
// Live-in registers are in use.
for (const auto &LI : MBB->liveins())
setRegUsed(LI.PhysReg, LI.LaneMask);
@@ -63,8 +60,8 @@ void RegScavenger::initRegState() {
setRegUsed(I);
}
-void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
- MachineFunction &MF = *mbb->getParent();
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
@@ -78,15 +75,15 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
"Cannot use register scavenger with inaccurate liveness");
// Self-initialize.
- if (!MBB) {
+ if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
RegUnitsAvailable.resize(NumRegUnits);
KillRegUnits.resize(NumRegUnits);
DefRegUnits.resize(NumRegUnits);
TmpRegUnits.resize(NumRegUnits);
}
+ this->MBB = &MBB;
- MBB = mbb;
initRegState();
Tracking = false;
@@ -100,17 +97,15 @@ void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
void RegScavenger::determineKillsAndDefs() {
assert(Tracking && "Must be tracking to determine kills and defs");
- MachineInstr *MI = MBBI;
- assert(!MI->isDebugValue() && "Debug values have no kills or defs");
+ MachineInstr &MI = *MBBI;
+ assert(!MI.isDebugValue() && "Debug values have no kills or defs");
// Find out which registers are early clobbered, killed, defined, and marked
// def-dead in this instruction.
KillRegUnits.reset();
DefRegUnits.reset();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask()) {
-
TmpRegUnits.clear();
for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
@@ -120,14 +115,14 @@ void RegScavenger::determineKillsAndDefs() {
}
}
}
-
+
// Apply the mask.
KillRegUnits |= TmpRegUnits;
}
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
@@ -149,8 +144,8 @@ void RegScavenger::determineKillsAndDefs() {
void RegScavenger::unprocess() {
assert(Tracking && "Cannot unprocess because we're not tracking");
- MachineInstr *MI = MBBI;
- if (!MI->isDebugValue()) {
+ MachineInstr &MI = *MBBI;
+ if (!MI.isDebugValue()) {
determineKillsAndDefs();
// Commit the changes.
@@ -176,30 +171,29 @@ void RegScavenger::forward() {
}
assert(MBBI != MBB->end() && "Already at the end of the basic block!");
- MachineInstr *MI = MBBI;
+ MachineInstr &MI = *MBBI;
for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
IE = Scavenged.end(); I != IE; ++I) {
- if (I->Restore != MI)
+ if (I->Restore != &MI)
continue;
I->Reg = 0;
I->Restore = nullptr;
}
- if (MI->isDebugValue())
+ if (MI.isDebugValue())
return;
determineKillsAndDefs();
// Verify uses and defs.
#ifndef NDEBUG
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg))
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -261,33 +255,24 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I)
- if (!isRegUsed(*I)) {
- DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ for (unsigned Reg : *RC) {
+ if (!isRegUsed(Reg)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) <<
"\n");
- return *I;
+ return Reg;
}
+ }
return 0;
}
-/// getRegsAvailable - Return all available registers in the register class
-/// in Mask.
BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
BitVector Mask(TRI->getNumRegs());
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I)
- if (!isRegUsed(*I))
- Mask.set(*I);
+ for (unsigned Reg : *RC)
+ if (!isRegUsed(Reg))
+ Mask.set(Reg);
return Mask;
}
-/// findSurvivorReg - Return the candidate register that is unused for the
-/// longest after StartMII. UseMI is set to the instruction where the search
-/// stopped.
-///
-/// No more than InstrLimit instructions are inspected.
-///
unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
BitVector &Candidates,
unsigned InstrLimit,
@@ -309,8 +294,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
bool isVirtKillInsn = false;
bool isVirtDefInsn = false;
// Remove any candidates touched by instruction.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
@@ -345,20 +329,19 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
}
// If we ran off the end, that's where we want to restore.
if (MI == ME) RestorePointMI = ME;
- assert (RestorePointMI != StartMI &&
- "No available scavenger restore location!");
+ assert(RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
// We ran out of candidates, so stop the search.
UseMI = RestorePointMI;
return Survivor;
}
-static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
+static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
unsigned i = 0;
- while (!MI->getOperand(i).isFI()) {
+ while (!MI.getOperand(i).isFI()) {
++i;
- assert(i < MI->getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
return i;
}
@@ -366,13 +349,13 @@ static unsigned getFrameIndexOperandNum(MachineInstr *MI) {
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
+ MachineInstr &MI = *I;
+ const MachineFunction &MF = *MI.getParent()->getParent();
// Consider all allocatable registers in the register class initially
- BitVector Candidates =
- TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+ BitVector Candidates = TRI->getAllocatableSet(MF, RC);
// Exclude all the registers being used by the instruction.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
Candidates.reset(MO.getReg());
@@ -395,16 +378,42 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
- // Find an available scavenging slot.
- unsigned SI;
- for (SI = 0; SI < Scavenged.size(); ++SI)
- if (Scavenged[SI].Reg == 0)
- break;
+ // Find an available scavenging slot with size and alignment matching
+ // the requirements of the class RC.
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned NeedSize = RC->getSize();
+ unsigned NeedAlign = RC->getAlignment();
+
+ unsigned SI = Scavenged.size(), Diff = UINT_MAX;
+ int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
+ for (unsigned I = 0; I < Scavenged.size(); ++I) {
+ if (Scavenged[I].Reg != 0)
+ continue;
+ // Verify that this slot is valid for this register.
+ int FI = Scavenged[I].FrameIndex;
+ if (FI < FIB || FI >= FIE)
+ continue;
+ unsigned S = MFI.getObjectSize(FI);
+ unsigned A = MFI.getObjectAlignment(FI);
+ if (NeedSize > S || NeedAlign > A)
+ continue;
+ // Avoid wasting slots with large size and/or large alignment. Pick one
+ // that is the best fit for this register class (in street metric).
+ // Picking a larger slot than necessary could happen if a slot for a
+ // larger register is reserved before a slot for a smaller one. When
+ // trying to spill a smaller register, the large slot would be found
+ // first, thus making it impossible to spill the larger register later.
+ unsigned D = (S-NeedSize) + (A-NeedAlign);
+ if (D < Diff) {
+ SI = I;
+ Diff = D;
+ }
+ }
if (SI == Scavenged.size()) {
// We need to scavenge a register but have no spill slot, the target
// must know how to do it (if not, we'll assert below).
- Scavenged.push_back(ScavengedInfo());
+ Scavenged.push_back(ScavengedInfo(FIE));
}
// Avoid infinite regress
@@ -414,13 +423,18 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// otherwise, use the emergency stack spill slot.
if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
// Spill the scavenged register before I.
- assert(Scavenged[SI].FrameIndex >= 0 &&
- "Cannot scavenge register without an emergency spill slot!");
+ int FI = Scavenged[SI].FrameIndex;
+ if (FI < FIB || FI >= FIE) {
+ std::string Msg = std::string("Error while trying to spill ") +
+ TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) +
+ ": Cannot scavenge register without an emergency spill slot!";
+ report_fatal_error(Msg.c_str());
+ }
TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
RC, TRI);
MachineBasicBlock::iterator II = std::prev(I);
- unsigned FIOperandNum = getFrameIndexOperandNum(II);
+ unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
@@ -428,11 +442,11 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
RC, TRI);
II = std::prev(UseMI);
- FIOperandNum = getFrameIndexOperandNum(II);
+ FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
- Scavenged[SI].Restore = std::prev(UseMI);
+ Scavenged[SI].Restore = &*std::prev(UseMI);
// Doing this here leads to infinite regress.
// Scavenged[SI].Reg = SReg;
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
new file mode 100644
index 0000000..5cf3e57
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -0,0 +1,93 @@
+//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+cl::opt<bool> DumpRegUsage(
+ "print-regusage", cl::init(false), cl::Hidden,
+ cl::desc("print register usage details collected for analysis."));
+
+INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
+ "Register Usage Informartion Stroage", false, true)
+
+char PhysicalRegisterUsageInfo::ID = 0;
+
+void PhysicalRegisterUsageInfo::anchor() {}
+
+bool PhysicalRegisterUsageInfo::doInitialization(Module &M) {
+ RegMasks.grow(M.size());
+ return false;
+}
+
+bool PhysicalRegisterUsageInfo::doFinalization(Module &M) {
+ if (DumpRegUsage)
+ print(errs());
+
+ RegMasks.shrink_and_clear();
+ return false;
+}
+
+void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo(
+ const Function *FP, std::vector<uint32_t> RegMask) {
+ assert(FP != nullptr && "Function * can't be nullptr.");
+ RegMasks[FP] = std::move(RegMask);
+}
+
+const std::vector<uint32_t> *
+PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) {
+ auto It = RegMasks.find(FP);
+ if (It != RegMasks.end())
+ return &(It->second);
+ return nullptr;
+}
+
+void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
+ const TargetRegisterInfo *TRI;
+
+ typedef std::pair<const Function *, std::vector<uint32_t>> FuncPtrRegMaskPair;
+
+ SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
+
+ // Create a vector of pointer to RegMasks entries
+ for (const auto &RegMask : RegMasks)
+ FPRMPairVector.push_back(&RegMask);
+
+ // sort the vector to print analysis in alphabatic order of function name.
+ std::sort(
+ FPRMPairVector.begin(), FPRMPairVector.end(),
+ [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
+ return A->first->getName() < B->first->getName();
+ });
+
+ for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) {
+ OS << FPRMPair->first->getName() << " "
+ << "Clobbered Registers: ";
+ TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first))
+ .getRegisterInfo();
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
+ OS << TRI->getName(PReg) << " ";
+ }
+ OS << "\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
new file mode 100644
index 0000000..ea952d9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -0,0 +1,388 @@
+//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Rename independent subregisters looks for virtual registers with
+/// independently used subregisters and renames them to new virtual registers.
+/// Example: In the following:
+/// %vreg0:sub0<read-undef> = ...
+/// %vreg0:sub1 = ...
+/// use %vreg0:sub0
+/// %vreg0:sub0 = ...
+/// use %vreg0:sub0
+/// use %vreg0:sub1
+/// sub0 and sub1 are never used together, and we have two independent sub0
+/// definitions. This pass will rename to:
+/// %vreg0:sub0<read-undef> = ...
+/// %vreg1:sub1<read-undef> = ...
+/// use %vreg1:sub1
+/// %vreg2:sub1<read-undef> = ...
+/// use %vreg2:sub1
+/// use %vreg0:sub0
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeUtils.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "rename-independent-subregs"
+
+namespace {
+
+class RenameIndependentSubregs : public MachineFunctionPass {
+public:
+ static char ID;
+ RenameIndependentSubregs() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "Rename Disconnected Subregister Components";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ struct SubRangeInfo {
+ ConnectedVNInfoEqClasses ConEQ;
+ LiveInterval::SubRange *SR;
+ unsigned Index;
+
+ SubRangeInfo(LiveIntervals &LIS, LiveInterval::SubRange &SR,
+ unsigned Index)
+ : ConEQ(LIS), SR(&SR), Index(Index) {}
+ };
+
+ /// Split unrelated subregister components and rename them to new vregs.
+ bool renameComponents(LiveInterval &LI) const;
+
+ /// \brief Build a vector of SubRange infos and a union find set of
+ /// equivalence classes.
+ /// Returns true if more than 1 equivalence class was found.
+ bool findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const;
+
+ /// \brief Distribute the LiveInterval segments into the new LiveIntervals
+ /// belonging to their class.
+ void distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// \brief Constructs main liverange and add missing undef+dead flags.
+ void computeMainRangesFixFlags(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// Rewrite Machine Operands to use the new vreg belonging to their class.
+ void rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+
+ LiveIntervals *LIS;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+};
+
+} // end anonymous namespace
+
+char RenameIndependentSubregs::ID;
+
+char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
+
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs",
+ "Rename Independent Subregisters", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs",
+ "Rename Independent Subregisters", false, false)
+
+bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
+ // Shortcut: We cannot have split components with a single definition.
+ if (LI.valnos.size() < 2)
+ return false;
+
+ SmallVector<SubRangeInfo, 4> SubRangeInfos;
+ IntEqClasses Classes;
+ if (!findComponents(Classes, SubRangeInfos, LI))
+ return false;
+
+ // Create a new VReg for each class.
+ unsigned Reg = LI.reg;
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ SmallVector<LiveInterval*, 4> Intervals;
+ Intervals.push_back(&LI);
+ DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses()
+ << " equivalence classes.\n");
+ DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:");
+ for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
+ ++I) {
+ unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
+ Intervals.push_back(&NewLI);
+ DEBUG(dbgs() << ' ' << PrintReg(NewVReg));
+ }
+ DEBUG(dbgs() << '\n');
+
+ rewriteOperands(Classes, SubRangeInfos, Intervals);
+ distribute(Classes, SubRangeInfos, Intervals);
+ computeMainRangesFixFlags(Classes, SubRangeInfos, Intervals);
+ return true;
+}
+
+bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<RenameIndependentSubregs::SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const {
+ // First step: Create connected components for the VNInfos inside the
+ // subranges and count the global number of such components.
+ unsigned NumComponents = 0;
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRangeInfos.push_back(SubRangeInfo(*LIS, SR, NumComponents));
+ ConnectedVNInfoEqClasses &ConEQ = SubRangeInfos.back().ConEQ;
+
+ unsigned NumSubComponents = ConEQ.Classify(SR);
+ NumComponents += NumSubComponents;
+ }
+ // Shortcut: With only 1 subrange, the normal separate component tests are
+ // enough and we do not need to perform the union-find on the subregister
+ // segments.
+ if (SubRangeInfos.size() < 2)
+ return false;
+
+ // Next step: Build union-find structure over all subranges and merge classes
+ // across subranges when they are affected by the same MachineOperand.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ Classes.grow(NumComponents);
+ unsigned Reg = LI.reg;
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ unsigned MergedID = ~0u;
+ for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask) == 0)
+ continue;
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+ : Pos.getBaseIndex();
+ const VNInfo *VNI = SR.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ continue;
+
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
+ // Global ID
+ unsigned ID = LocalID + SRInfo.Index;
+ // Merge other sets
+ MergedID = MergedID == ~0u ? ID : Classes.join(MergedID, ID);
+ }
+ }
+
+ // Early exit if we ended up with a single equivalence class.
+ Classes.compress();
+ unsigned NumClasses = Classes.getNumClasses();
+ return NumClasses > 1;
+}
+
+void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = Intervals[0]->reg;;
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ) {
+ MachineOperand &MO = *I++;
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ MachineInstr &MI = *MO.getParent();
+
+ SlotIndex Pos = LIS->getInstructionIndex(MI);
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+
+ unsigned ID = ~0u;
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask) == 0)
+ continue;
+ LiveRange::const_iterator I = SR.find(Pos);
+ if (I == SR.end())
+ continue;
+
+ const VNInfo &VNI = *I->valno;
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ // Global ID
+ ID = Classes[LocalID + SRInfo.Index];
+ break;
+ }
+
+ unsigned VReg = Intervals[ID]->reg;
+ MO.setReg(VReg);
+ }
+ // TODO: We could attempt to recompute new register classes while visiting
+ // the operands: Some of the split register may be fine with less constraint
+ // classes than the original vreg.
+}
+
+void RenameIndependentSubregs::distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ unsigned NumClasses = Classes.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ LiveInterval::SubRange &SR = *SRInfo.SR;
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumClasses-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ unsigned ID = Classes[LocalID + SRInfo.Index];
+ VNIMapping.push_back(ID);
+ if (ID > 0 && SubRanges[ID-1] == nullptr)
+ SubRanges[ID-1] = Intervals[ID]->createSubRange(Allocator, SR.LaneMask);
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
+ }
+}
+
+static bool subRangeLiveAt(const LiveInterval &LI, SlotIndex Pos) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (SR.liveAt(Pos))
+ return true;
+ }
+ return false;
+}
+
+void RenameIndependentSubregs::computeMainRangesFixFlags(
+ const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+ for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
+ LiveInterval &LI = *Intervals[I];
+ unsigned Reg = LI.reg;
+
+ LI.removeEmptySubRanges();
+
+ // There must be a def (or live-in) before every use. Splitting vregs may
+ // violate this principle as the splitted vreg may not have a definition on
+ // every path. Fix this by creating IMPLICIT_DEF instruction as necessary.
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ // Search for "PHI" value numbers in the subranges. We must find a live
+ // value in each predecessor block, add an IMPLICIT_DEF where it is
+ // missing.
+ for (unsigned I = 0; I < SR.valnos.size(); ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ if (VNI.isUnused() || !VNI.isPHIDef())
+ continue;
+
+ SlotIndex Def = VNI.def;
+ MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(Def);
+ for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(PredMBB);
+ if (subRangeLiveAt(LI, PredEnd.getPrevSlot()))
+ continue;
+
+ MachineBasicBlock::iterator InsertPos =
+ llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg);
+ const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos,
+ DebugLoc(), MCDesc, Reg);
+ SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
+ SlotIndex RegDefIdx = DefIdx.getRegSlot();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
+ SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
+ }
+ }
+ }
+ }
+
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0)
+ continue;
+ // After assigning the new vreg we may not have any other sublanes living
+ // in and out of the instruction anymore. We need to add new dead and
+ // undef flags in these cases.
+ if (!MO.isUndef()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsUndef();
+ }
+ if (!MO.isDead()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()).getDeadSlot();
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsDead();
+ }
+ }
+
+ if (I == 0)
+ LI.clear();
+ LIS->constructMainRangeFromSubranges(LI);
+ }
+}
+
+bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
+ // Skip renaming if liveness of subregister is not tracked.
+ if (!MF.getSubtarget().enableSubRegLiveness())
+ return false;
+
+ DEBUG(dbgs() << "Renaming independent subregister live ranges in "
+ << MF.getName() << '\n');
+
+ LIS = &getAnalysis<LiveIntervals>();
+ MRI = &MF.getRegInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
+ // created vregs end up with higher numbers but do not need to be visited as
+ // there can't be any further splitting.
+ bool Changed = false;
+ for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (!LIS->hasInterval(Reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.hasSubRanges())
+ continue;
+
+ Changed |= renameComponents(LI);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
new file mode 100644
index 0000000..4a1b995
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -0,0 +1,851 @@
+//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass splits the stack into the safe stack (kept as-is for LLVM backend)
+// and the unsafe stack (explicitly allocated and managed through the runtime
+// support library).
+//
+// http://clang.llvm.org/docs/SafeStack.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackColoring.h"
+#include "SafeStackLayout.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestack"
+
+enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
+
+static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
+ cl::Hidden, cl::init(ThreadLocalUSP),
+ cl::desc("Type of storage for the unsafe stack pointer"),
+ cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
+ "Thread-local storage"),
+ clEnumValN(SingleThreadUSP, "single-thread",
+ "Non-thread-local storage"),
+ clEnumValEnd));
+
+namespace llvm {
+
+STATISTIC(NumFunctions, "Total number of functions");
+STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack");
+STATISTIC(NumUnsafeStackRestorePointsFunctions,
+ "Number of functions that use setjmp or exceptions");
+
+STATISTIC(NumAllocas, "Total number of allocas");
+STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
+STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
+STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
+
+} // namespace llvm
+
+namespace {
+
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+///
+/// The implementation simply replaces all mentions of the alloca with zero.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
+
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
+ }
+};
+
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
+class SafeStack : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TL;
+ const DataLayout *DL;
+ ScalarEvolution *SE;
+
+ Type *StackPtrTy;
+ Type *IntPtrTy;
+ Type *Int32Ty;
+ Type *Int8Ty;
+
+ Value *UnsafeStackPtr = nullptr;
+
+ /// Unsafe stack alignment. Each stack frame must ensure that the stack is
+ /// aligned to this value. We need to re-align the unsafe stack if the
+ /// alignment of any object on the stack exceeds this value.
+ ///
+ /// 16 seems like a reasonable upper bound on the alignment of objects that we
+ /// might expect to appear on the stack on most common targets.
+ enum { StackAlignment = 16 };
+
+ /// \brief Build a value representing a pointer to the unsafe stack pointer.
+ Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
+
+ /// \brief Return the value of the stack canary.
+ Value *getStackGuard(IRBuilder<> &IRB, Function &F);
+
+ /// \brief Load stack guard from the frame and check if it has changed.
+ void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard);
+
+ /// \brief Find all static allocas, dynamic allocas, return instructions and
+ /// stack restore points (exception unwind blocks and setjmp calls) in the
+ /// given function and append them to the respective vectors.
+ void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints);
+
+ /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+ /// size can not be statically determined.
+ uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
+ /// \brief Allocate space for all static allocas in \p StaticAllocas,
+ /// replace allocas with pointers into the unsafe stack and generate code to
+ /// restore the stack pointer before all return instructions in \p Returns.
+ ///
+ /// \returns A pointer to the top of the unsafe stack after all unsafe static
+ /// allocas are allocated.
+ Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
+ ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments,
+ ArrayRef<ReturnInst *> Returns,
+ Instruction *BasePointer,
+ AllocaInst *StackGuardSlot);
+
+ /// \brief Generate code to restore the stack after all stack restore points
+ /// in \p StackRestorePoints.
+ ///
+ /// \returns A local variable in which to maintain the dynamic top of the
+ /// unsafe stack if needed.
+ AllocaInst *
+ createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop);
+
+ /// \brief Replace all allocas in \p DynamicAllocas with code to allocate
+ /// space dynamically on the unsafe stack and store the dynamic unsafe stack
+ /// top to \p DynamicTop if non-null.
+ void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr,
+ AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas);
+
+ bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+ bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr, uint64_t AllocaSize);
+ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+ uint64_t AllocaSize);
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ SafeStack(const TargetMachine *TM)
+ : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
+ initializeSafeStackPass(*PassRegistry::getPassRegistry());
+ }
+ SafeStack() : SafeStack(nullptr) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+
+ StackPtrTy = Type::getInt8PtrTy(M.getContext());
+ IntPtrTy = DL->getIntPtrType(M.getContext());
+ Int32Ty = Type::getInt32Ty(M.getContext());
+ Int8Ty = Type::getInt8Ty(M.getContext());
+
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+}; // class SafeStack
+
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+ uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+ const Value *AllocaPtr, uint64_t AllocaSize) {
+ AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+
+ uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ ConstantRange SizeRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ ConstantRange AllocaRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+ bool Safe = AllocaRange.contains(AccessRange);
+
+ DEBUG(dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE->getUnsignedRange(Expr)
+ << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
+
+ return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr,
+ uint64_t AllocaSize) {
+ // All MemIntrinsics have destination address in Arg0 and size in Arg2.
+ if (MI->getRawDest() != U) return true;
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len) return false;
+ return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+ // Go through all uses of this alloca and check whether all accesses to the
+ // allocated object are statically known to be memory safe and, hence, the
+ // object can be placed on the safe stack.
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AllocaPtr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
+ return false;
+ }
+
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ AllocaPtr, AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::Ret: {
+ // Information leak.
+ return false;
+ }
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I
+ << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ // LLVM 'nocapture' attribute is only set for arguments whose address
+ // is not stored, passed around, or used in any other non-trivial way.
+ // We assume that passing a pointer to an object as a 'nocapture
+ // readnone' argument is safe.
+ // FIXME: a more precise solution would require an interprocedural
+ // analysis here, which would look at all uses of an argument inside
+ // the function being called.
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V)
+ if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+ CS.doesNotAccessMemory()))) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ // All uses of the alloca are safe, we can place it on the safe stack.
+ return true;
+}
+
+Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
+ // Check if there is a target-specific location for the unsafe stack pointer.
+ if (TL)
+ if (Value *V = TL->getSafeStackPointerLocation(IRB))
+ return V;
+
+ // Otherwise, assume the target links with compiler-rt, which provides a
+ // thread-local variable with a magic name.
+ Module &M = *F.getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+ auto UnsafeStackPtr =
+ dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
+
+ bool UseTLS = USPStorage == ThreadLocalUSP;
+
+ if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
+ // The global variable is not defined yet, define it ourselves.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
+ UnsafeStackPtr = new GlobalVariable(
+ M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
+ } else {
+ // The variable exists, check its type and attributes.
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
+ }
+ return UnsafeStackPtr;
+}
+
+Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
+ Value *StackGuardVar = nullptr;
+ if (TL)
+ StackGuardVar = TL->getIRStackGuard(IRB);
+ if (!StackGuardVar)
+ StackGuardVar =
+ F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
+ return IRB.CreateLoad(StackGuardVar, "StackGuard");
+}
+
+void SafeStack::findInsts(Function &F,
+ SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints) {
+ for (Instruction &I : instructions(&F)) {
+ if (auto AI = dyn_cast<AllocaInst>(&I)) {
+ ++NumAllocas;
+
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (IsSafeStackAlloca(AI, Size))
+ continue;
+
+ if (AI->isStaticAlloca()) {
+ ++NumUnsafeStaticAllocas;
+ StaticAllocas.push_back(AI);
+ } else {
+ ++NumUnsafeDynamicAllocas;
+ DynamicAllocas.push_back(AI);
+ }
+ } else if (auto RI = dyn_cast<ReturnInst>(&I)) {
+ Returns.push_back(RI);
+ } else if (auto CI = dyn_cast<CallInst>(&I)) {
+ // setjmps require stack restore.
+ if (CI->getCalledFunction() && CI->canReturnTwice())
+ StackRestorePoints.push_back(CI);
+ } else if (auto LP = dyn_cast<LandingPadInst>(&I)) {
+ // Exception landing pads require stack restore.
+ StackRestorePoints.push_back(LP);
+ } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::gcroot)
+ llvm::report_fatal_error(
+ "gcroot intrinsic not compatible with safestack attribute");
+ }
+ }
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasByValAttr())
+ continue;
+ uint64_t Size =
+ DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ if (IsSafeStackAlloca(&Arg, Size))
+ continue;
+
+ ++NumUnsafeByValArguments;
+ ByValArguments.push_back(&Arg);
+ }
+}
+
+AllocaInst *
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop) {
+ assert(StaticTop && "The stack top isn't set.");
+
+ if (StackRestorePoints.empty())
+ return nullptr;
+
+ // We need the current value of the shadow stack pointer to restore
+ // after longjmp or exception catching.
+
+ // FIXME: On some platforms this could be handled by the longjmp/exception
+ // runtime itself.
+
+ AllocaInst *DynamicTop = nullptr;
+ if (NeedDynamicTop) {
+ // If we also have dynamic alloca's, the stack pointer value changes
+ // throughout the function. For now we store it in an alloca.
+ DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr,
+ "unsafe_stack_dynamic_ptr");
+ IRB.CreateStore(StaticTop, DynamicTop);
+ }
+
+ // Restore current stack pointer after longjmp/exception catch.
+ for (Instruction *I : StackRestorePoints) {
+ ++NumUnsafeStackRestorePoints;
+
+ IRB.SetInsertPoint(I->getNextNode());
+ Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+ IRB.CreateStore(CurrentTop, UnsafeStackPtr);
+ }
+
+ return DynamicTop;
+}
+
+void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard) {
+ Value *V = IRB.CreateLoad(StackGuardSlot);
+ Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
+
+ auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false);
+ MDNode *Weights = MDBuilder(F.getContext())
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(Cmp, &RI,
+ /* Unreachable */ true, Weights);
+ IRBuilder<> IRBFail(CheckTerm);
+ // FIXME: respect -fsanitize-trap / -ftrap-function here?
+ Constant *StackChkFail = F.getParent()->getOrInsertFunction(
+ "__stack_chk_fail", IRB.getVoidTy(), nullptr);
+ IRBFail.CreateCall(StackChkFail, {});
+}
+
+/// We explicitly compute and set the unsafe stack layout for all unsafe
+/// static alloca instructions. We save the unsafe "base pointer" in the
+/// prologue into a local variable and restore it in the epilogue.
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+ IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns,
+ Instruction *BasePointer, AllocaInst *StackGuardSlot) {
+ if (StaticAllocas.empty() && ByValArguments.empty())
+ return BasePointer;
+
+ DIBuilder DIB(*F.getParent());
+
+ StackColoring SSC(F, StaticAllocas);
+ SSC.run();
+ SSC.removeAllMarkers();
+
+ // Unsafe stack always grows down.
+ StackLayout SSL(StackAlignment);
+ if (StackGuardSlot) {
+ Type *Ty = StackGuardSlot->getAllocatedType();
+ unsigned Align =
+ std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
+ Align, SSC.getFullLiveRange());
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+ SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
+ }
+
+ for (AllocaInst *AI : StaticAllocas) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align =
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+
+ SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI));
+ }
+
+ SSL.computeLayout();
+ unsigned FrameAlignment = SSL.getFrameAlignment();
+
+ // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
+ // (AlignmentSkew).
+ if (FrameAlignment > StackAlignment) {
+ // Re-align the base pointer according to the max requested alignment.
+ assert(isPowerOf2_32(FrameAlignment));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+ BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
+ IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+ ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))),
+ StackPtrTy));
+ }
+
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ if (StackGuardSlot) {
+ unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewAI =
+ IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
+
+ // Replace alloc with the new location.
+ StackGuardSlot->replaceAllUsesWith(NewAI);
+ StackGuardSlot->eraseFromParent();
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ unsigned Offset = SSL.getObjectOffset(Arg);
+ Type *Ty = Arg->getType()->getPointerElementType();
+
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+ Arg->getName() + ".unsafe-byval");
+
+ // Replace alloc with the new location.
+ replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
+ /*Deref=*/true, -Offset);
+ Arg->replaceAllUsesWith(NewArg);
+ IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+ IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+ }
+
+ // Allocate space for every unsafe static AllocaInst on the unsafe stack.
+ for (AllocaInst *AI : StaticAllocas) {
+ IRB.SetInsertPoint(AI);
+ unsigned Offset = SSL.getObjectOffset(AI);
+
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset);
+ replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
+
+ // Replace uses of the alloca with the new location.
+ // Insert address calculation close to each use to work around PR27844.
+ std::string Name = std::string(AI->getName()) + ".unsafe";
+ while (!AI->use_empty()) {
+ Use &U = *AI->use_begin();
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Instruction *InsertBefore;
+ if (auto *PHI = dyn_cast<PHINode>(User))
+ InsertBefore = PHI->getIncomingBlock(U)->getTerminator();
+ else
+ InsertBefore = User;
+
+ IRBuilder<> IRBUser(InsertBefore);
+ Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
+
+ if (auto *PHI = dyn_cast<PHINode>(User)) {
+ // PHI nodes may have multiple incoming edges from the same BB (why??),
+ // all must be updated at once with the same incoming value.
+ auto *BB = PHI->getIncomingBlock(U);
+ for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I)
+ if (PHI->getIncomingBlock(I) == BB)
+ PHI->setIncomingValue(I, Replacement);
+ } else {
+ U.set(Replacement);
+ }
+ }
+
+ AI->eraseFromParent();
+ }
+
+ // Re-align BasePointer so that our callees would see it aligned as
+ // expected.
+ // FIXME: no need to update BasePointer in leaf functions.
+ unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment);
+
+ // Update shadow stack pointer in the function epilogue.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ Value *StaticTop =
+ IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+ "unsafe_stack_static_top");
+ IRB.CreateStore(StaticTop, UnsafeStackPtr);
+ return StaticTop;
+}
+
+void SafeStack::moveDynamicAllocasToUnsafeStack(
+ Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas) {
+ DIBuilder DIB(*F.getParent());
+
+ for (AllocaInst *AI : DynamicAllocas) {
+ IRBuilder<> IRB(AI);
+
+ // Compute the new SP value (after AI).
+ Value *ArraySize = AI->getArraySize();
+ if (ArraySize->getType() != IntPtrTy)
+ ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
+
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = DL->getTypeAllocSize(Ty);
+ Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
+
+ Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+ SP = IRB.CreateSub(SP, Size);
+
+ // Align the SP value to satisfy the AllocaInst, type and stack alignments.
+ unsigned Align = std::max(
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()),
+ (unsigned)StackAlignment);
+
+ assert(isPowerOf2_32(Align));
+ Value *NewTop = IRB.CreateIntToPtr(
+ IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))),
+ StackPtrTy);
+
+ // Save the stack pointer.
+ IRB.CreateStore(NewTop, UnsafeStackPtr);
+ if (DynamicTop)
+ IRB.CreateStore(NewTop, DynamicTop);
+
+ Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
+ if (AI->hasName() && isa<Instruction>(NewAI))
+ NewAI->takeName(AI);
+
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ AI->replaceAllUsesWith(NewAI);
+ AI->eraseFromParent();
+ }
+
+ if (!DynamicAllocas.empty()) {
+ // Now go through the instructions again, replacing stacksave/stackrestore.
+ for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
+ Instruction *I = &*(It++);
+ auto II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::stacksave) {
+ IRBuilder<> IRB(II);
+ Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+ LI->takeName(II);
+ II->replaceAllUsesWith(LI);
+ II->eraseFromParent();
+ } else if (II->getIntrinsicID() == Intrinsic::stackrestore) {
+ IRBuilder<> IRB(II);
+ Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr);
+ SI->takeName(II);
+ assert(II->use_empty());
+ II->eraseFromParent();
+ }
+ }
+ }
+}
+
+bool SafeStack::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return false;
+ }
+
+ if (F.isDeclaration()) {
+ DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return false;
+ }
+
+ TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ ++NumFunctions;
+
+ SmallVector<AllocaInst *, 16> StaticAllocas;
+ SmallVector<AllocaInst *, 4> DynamicAllocas;
+ SmallVector<Argument *, 4> ByValArguments;
+ SmallVector<ReturnInst *, 4> Returns;
+
+ // Collect all points where stack gets unwound and needs to be restored
+ // This is only necessary because the runtime (setjmp and unwind code) is
+ // not aware of the unsafe stack and won't unwind/restore it prorerly.
+ // To work around this problem without changing the runtime, we insert
+ // instrumentation to restore the unsafe stack pointer when necessary.
+ SmallVector<Instruction *, 4> StackRestorePoints;
+
+ // Find all static and dynamic alloca instructions that must be moved to the
+ // unsafe stack, all return instructions and stack restore points.
+ findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+ StackRestorePoints);
+
+ if (StaticAllocas.empty() && DynamicAllocas.empty() &&
+ ByValArguments.empty() && StackRestorePoints.empty())
+ return false; // Nothing to do in this function.
+
+ if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+ !ByValArguments.empty())
+ ++NumUnsafeStackFunctions; // This function has the unsafe stack.
+
+ if (!StackRestorePoints.empty())
+ ++NumUnsafeStackRestorePointsFunctions;
+
+ IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
+
+ // Load the current stack pointer (we'll also use it as a base pointer).
+ // FIXME: use a dedicated register for it ?
+ Instruction *BasePointer =
+ IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+ assert(BasePointer->getType() == StackPtrTy);
+
+ AllocaInst *StackGuardSlot = nullptr;
+ // FIXME: implement weaker forms of stack protector.
+ if (F.hasFnAttribute(Attribute::StackProtect) ||
+ F.hasFnAttribute(Attribute::StackProtectStrong) ||
+ F.hasFnAttribute(Attribute::StackProtectReq)) {
+ Value *StackGuard = getStackGuard(IRB, F);
+ StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr);
+ IRB.CreateStore(StackGuard, StackGuardSlot);
+
+ for (ReturnInst *RI : Returns) {
+ IRBuilder<> IRBRet(RI);
+ checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard);
+ }
+ }
+
+ // The top of the unsafe stack after all unsafe static allocas are
+ // allocated.
+ Value *StaticTop =
+ moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments,
+ Returns, BasePointer, StackGuardSlot);
+
+ // Safe stack object that stores the current unsafe stack top. It is updated
+ // as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
+ // This is only needed if we need to restore stack pointer after longjmp
+ // or exceptions, and we have dynamic allocations.
+ // FIXME: a better alternative might be to store the unsafe stack pointer
+ // before setjmp / invoke instructions.
+ AllocaInst *DynamicTop = createStackRestorePoints(
+ IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+
+ // Handle dynamic allocas.
+ moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
+ DynamicAllocas);
+
+ // Restore the unsafe stack pointer before each return.
+ for (ReturnInst *RI : Returns) {
+ IRB.SetInsertPoint(RI);
+ IRB.CreateStore(BasePointer, UnsafeStackPtr);
+ }
+
+ DEBUG(dbgs() << "[SafeStack] safestack applied\n");
+ return true;
+}
+
+} // anonymous namespace
+
+char SafeStack::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+
+FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
+ return new SafeStack(TM);
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
new file mode 100644
index 0000000..795eb8d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -0,0 +1,291 @@
+//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackColoring.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestackcoloring"
+
+static cl::opt<bool> ClColoring("safe-stack-coloring",
+ cl::desc("enable safe stack coloring"),
+ cl::Hidden, cl::init(true));
+
+const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
+ const auto IT = AllocaNumbering.find(AI);
+ assert(IT != AllocaNumbering.end());
+ return LiveRanges[IT->second];
+}
+
+bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end))
+ return false;
+
+ *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return true;
+}
+
+void StackColoring::removeAllMarkers() {
+ for (auto *I : Markers) {
+ auto *Op = dyn_cast<Instruction>(I->getOperand(1));
+ I->eraseFromParent();
+ // Remove the operand bitcast, too, if it has no more uses left.
+ if (Op && Op->use_empty())
+ Op->eraseFromParent();
+ }
+}
+
+void StackColoring::collectMarkers() {
+ InterestingAllocas.resize(NumAllocas);
+ DenseMap<BasicBlock *, SmallDenseMap<Instruction *, Marker>> BBMarkerSet;
+
+ // Compute the set of start/end markers per basic block.
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ AllocaInst *AI = Allocas[AllocaNo];
+ SmallVector<Instruction *, 8> WorkList;
+ WorkList.push_back(AI);
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ for (User *U : I->users()) {
+ if (auto *BI = dyn_cast<BitCastInst>(U)) {
+ WorkList.push_back(BI);
+ continue;
+ }
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI)
+ continue;
+ bool IsStart;
+ if (!readMarker(UI, &IsStart))
+ continue;
+ if (IsStart)
+ InterestingAllocas.set(AllocaNo);
+ BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart};
+ Markers.push_back(UI);
+ }
+ }
+ }
+
+ // Compute instruction numbering. Only the following instructions are
+ // considered:
+ // * Basic block entries
+ // * Lifetime markers
+ // For each basic block, compute
+ // * the list of markers in the instruction order
+ // * the sets of allocas whose lifetime starts or ends in this BB
+ DEBUG(dbgs() << "Instructions:\n");
+ unsigned InstNo = 0;
+ for (BasicBlock *BB : depth_first(&F)) {
+ DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n");
+ unsigned BBStart = InstNo++;
+
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+ BlockInfo.Begin.resize(NumAllocas);
+ BlockInfo.End.resize(NumAllocas);
+ BlockInfo.LiveIn.resize(NumAllocas);
+ BlockInfo.LiveOut.resize(NumAllocas);
+
+ auto &BlockMarkerSet = BBMarkerSet[BB];
+ if (BlockMarkerSet.empty()) {
+ unsigned BBEnd = InstNo;
+ BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
+ continue;
+ }
+
+ auto ProcessMarker = [&](Instruction *I, const Marker &M) {
+ DEBUG(dbgs() << " " << InstNo << ": "
+ << (M.IsStart ? "start " : "end ") << M.AllocaNo << ", "
+ << *I << "\n");
+
+ BBMarkers[BB].push_back({InstNo, M});
+
+ InstructionNumbering[I] = InstNo++;
+
+ if (M.IsStart) {
+ if (BlockInfo.End.test(M.AllocaNo))
+ BlockInfo.End.reset(M.AllocaNo);
+ BlockInfo.Begin.set(M.AllocaNo);
+ } else {
+ if (BlockInfo.Begin.test(M.AllocaNo))
+ BlockInfo.Begin.reset(M.AllocaNo);
+ BlockInfo.End.set(M.AllocaNo);
+ }
+ };
+
+ if (BlockMarkerSet.size() == 1) {
+ ProcessMarker(BlockMarkerSet.begin()->getFirst(),
+ BlockMarkerSet.begin()->getSecond());
+ } else {
+ // Scan the BB to determine the marker order.
+ for (Instruction &I : *BB) {
+ auto It = BlockMarkerSet.find(&I);
+ if (It == BlockMarkerSet.end())
+ continue;
+ ProcessMarker(&I, It->getSecond());
+ }
+ }
+
+ unsigned BBEnd = InstNo;
+ BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
+ }
+ NumInst = InstNo;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ bool changed = true;
+ while (changed) {
+ changed = false;
+
+ for (BasicBlock *BB : depth_first(&F)) {
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+
+ // Compute LiveIn by unioning together the LiveOut sets of all preds.
+ BitVector LocalLiveIn;
+ for (auto *PredBB : predecessors(BB)) {
+ LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+
+ // Compute LiveOut by subtracting out lifetimes that end in this
+ // block, then adding in lifetimes that begin in this block. If
+ // we have both BEGIN and END markers in the same basic block
+ // then we know that the BEGIN marker comes after the END,
+ // because we already handle the case where the BEGIN comes
+ // before the END when collecting the markers (and building the
+ // BEGIN/END vectors).
+ BitVector LocalLiveOut = LocalLiveIn;
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveOut |= BlockInfo.Begin;
+
+ // Update block LiveIn set, noting whether it has changed.
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+ }
+
+ // Update block LiveOut set, noting whether it has changed.
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+ }
+ }
+ } // while changed.
+}
+
+void StackColoring::calculateLiveIntervals() {
+ for (auto IT : BlockLiveness) {
+ BasicBlock *BB = IT.getFirst();
+ BlockLifetimeInfo &BlockInfo = IT.getSecond();
+ unsigned BBStart, BBEnd;
+ std::tie(BBStart, BBEnd) = BlockInstRange[BB];
+
+ BitVector Started, Ended;
+ Started.resize(NumAllocas);
+ Ended.resize(NumAllocas);
+ SmallVector<unsigned, 8> Start;
+ Start.resize(NumAllocas);
+
+ // LiveIn ranges start at the first instruction.
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ if (BlockInfo.LiveIn.test(AllocaNo)) {
+ Started.set(AllocaNo);
+ Start[AllocaNo] = BBStart;
+ }
+ }
+
+ for (auto &It : BBMarkers[BB]) {
+ unsigned InstNo = It.first;
+ bool IsStart = It.second.IsStart;
+ unsigned AllocaNo = It.second.AllocaNo;
+
+ if (IsStart) {
+ assert(!Started.test(AllocaNo));
+ Started.set(AllocaNo);
+ Ended.reset(AllocaNo);
+ Start[AllocaNo] = InstNo;
+ } else {
+ assert(!Ended.test(AllocaNo));
+ if (Started.test(AllocaNo)) {
+ LiveRanges[AllocaNo].AddRange(Start[AllocaNo], InstNo);
+ Started.reset(AllocaNo);
+ }
+ Ended.set(AllocaNo);
+ }
+ }
+
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
+ if (Started.test(AllocaNo))
+ LiveRanges[AllocaNo].AddRange(Start[AllocaNo], BBEnd);
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
+ dbgs() << "Allocas:\n";
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
+ dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n";
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpBlockLiveness() {
+ dbgs() << "Block liveness:\n";
+ for (auto IT : BlockLiveness) {
+ BasicBlock *BB = IT.getFirst();
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+ auto BlockRange = BlockInstRange[BB];
+ dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second
+ << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End
+ << ", livein " << BlockInfo.LiveIn << ", liveout "
+ << BlockInfo.LiveOut << "\n";
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
+ dbgs() << "Alloca liveness:\n";
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ LiveRange &Range = LiveRanges[AllocaNo];
+ dbgs() << " " << AllocaNo << ": " << Range << "\n";
+ }
+}
+
+void StackColoring::run() {
+ DEBUG(dumpAllocas());
+
+ for (unsigned I = 0; I < NumAllocas; ++I)
+ AllocaNumbering[Allocas[I]] = I;
+ LiveRanges.resize(NumAllocas);
+
+ collectMarkers();
+
+ if (!ClColoring) {
+ for (auto &R : LiveRanges) {
+ R.SetMaximum(1);
+ R.AddRange(0, 1);
+ }
+ return;
+ }
+
+ for (auto &R : LiveRanges)
+ R.SetMaximum(NumInst);
+ for (unsigned I = 0; I < NumAllocas; ++I)
+ if (!InterestingAllocas.test(I))
+ LiveRanges[I] = getFullLiveRange();
+
+ calculateLocalLiveness();
+ DEBUG(dumpBlockLiveness());
+ calculateLiveIntervals();
+ DEBUG(dumpLiveRanges());
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.h b/contrib/llvm/lib/CodeGen/SafeStackColoring.h
new file mode 100644
index 0000000..08b179c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.h
@@ -0,0 +1,149 @@
+//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
+#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_os_ostream.h"
+
+namespace llvm {
+class AllocaInst;
+
+namespace safestack {
+/// Compute live ranges of allocas.
+/// Live ranges are represented as sets of "interesting" instructions, which are
+/// defined as instructions that may start or end an alloca's lifetime. These
+/// are:
+/// * lifetime.start and lifetime.end intrinsics
+/// * first instruction of any basic block
+/// Interesting instructions are numbered in the depth-first walk of the CFG,
+/// and in the program order inside each basic block.
+class StackColoring {
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+public:
+ /// This class represents a set of interesting instructions where an alloca is
+ /// live.
+ struct LiveRange {
+ BitVector bv;
+ void SetMaximum(int size) { bv.resize(size); }
+ void AddRange(unsigned start, unsigned end) { bv.set(start, end); }
+ bool Overlaps(const LiveRange &Other) const {
+ return bv.anyCommon(Other.bv);
+ }
+ void Join(const LiveRange &Other) { bv |= Other.bv; }
+ };
+
+private:
+ Function &F;
+
+ /// Maps active slots (per bit) for each basic block.
+ typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ /// Number of interesting instructions.
+ int NumInst;
+ /// Numeric ids for interesting instructions.
+ DenseMap<Instruction *, unsigned> InstructionNumbering;
+ /// A range [Start, End) of instruction ids for each basic block.
+ /// Instructions inside each BB have monotonic and consecutive ids.
+ DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
+
+ ArrayRef<AllocaInst *> Allocas;
+ unsigned NumAllocas;
+ DenseMap<AllocaInst *, unsigned> AllocaNumbering;
+ /// LiveRange for allocas.
+ SmallVector<LiveRange, 8> LiveRanges;
+
+ /// The set of allocas that have at least one lifetime.start. All other
+ /// allocas get LiveRange that corresponds to the entire function.
+ BitVector InterestingAllocas;
+ SmallVector<Instruction *, 8> Markers;
+
+ struct Marker {
+ unsigned AllocaNo;
+ bool IsStart;
+ };
+
+ /// List of {InstNo, {AllocaNo, IsStart}} for each BB, ordered by InstNo.
+ DenseMap<BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers;
+
+ void dumpAllocas();
+ void dumpBlockLiveness();
+ void dumpLiveRanges();
+
+ bool readMarker(Instruction *I, bool *IsStart);
+ void collectMarkers();
+ void calculateLocalLiveness();
+ void calculateLiveIntervals();
+
+public:
+ StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas)
+ : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {}
+
+ void run();
+ void removeAllMarkers();
+
+ /// Returns a set of "interesting" instructions where the given alloca is
+ /// live. Not all instructions in a function are interesting: we pick a set
+ /// that is large enough for LiveRange::Overlaps to be correct.
+ const LiveRange &getLiveRange(AllocaInst *AI);
+
+ /// Returns a live range that represents an alloca that is live throughout the
+ /// entire function.
+ LiveRange getFullLiveRange() {
+ assert(NumInst >= 0);
+ LiveRange R;
+ R.SetMaximum(NumInst);
+ R.AddRange(0, NumInst);
+ return R;
+ }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) {
+ OS << "{";
+ int idx = V.find_first();
+ bool first = true;
+ while (idx >= 0) {
+ if (!first) {
+ OS << ", ";
+ }
+ first = false;
+ OS << idx;
+ idx = V.find_next(idx);
+ }
+ OS << "}";
+ return OS;
+}
+
+static inline raw_ostream &operator<<(raw_ostream &OS,
+ const StackColoring::LiveRange &R) {
+ return OS << R.bv;
+}
+
+} // namespace safestack
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
new file mode 100644
index 0000000..fb433c1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -0,0 +1,139 @@
+//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackLayout.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestacklayout"
+
+static cl::opt<bool> ClLayout("safe-stack-layout",
+ cl::desc("enable safe stack layout"), cl::Hidden,
+ cl::init(true));
+
+LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
+ OS << "Stack regions:\n";
+ for (unsigned i = 0; i < Regions.size(); ++i) {
+ OS << " " << i << ": [" << Regions[i].Start << ", " << Regions[i].End
+ << "), range " << Regions[i].Range << "\n";
+ }
+ OS << "Stack objects:\n";
+ for (auto &IT : ObjectOffsets) {
+ OS << " at " << IT.getSecond() << ": " << *IT.getFirst() << "\n";
+ }
+}
+
+void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
+ const StackColoring::LiveRange &Range) {
+ StackObjects.push_back({V, Size, Alignment, Range});
+ MaxAlignment = std::max(MaxAlignment, Alignment);
+}
+
+static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
+ unsigned Alignment) {
+ return alignTo(Offset + Size, Alignment) - Size;
+}
+
+void StackLayout::layoutObject(StackObject &Obj) {
+ if (!ClLayout) {
+ // If layout is disabled, just grab the next aligned address.
+ // This effectively disables stack coloring as well.
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ unsigned Start = AdjustStackOffset(LastRegionEnd, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ Regions.emplace_back(Start, End, Obj.Range);
+ ObjectOffsets[Obj.Handle] = End;
+ return;
+ }
+
+ DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment
+ << ", range " << Obj.Range << "\n");
+ assert(Obj.Alignment <= MaxAlignment);
+ unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n");
+ for (const StackRegion &R : Regions) {
+ DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End
+ << ", range " << R.Range << "\n");
+ assert(End >= R.Start);
+ if (Start >= R.End) {
+ DEBUG(dbgs() << " Does not intersect, skip.\n");
+ continue;
+ }
+ if (Obj.Range.Overlaps(R.Range)) {
+ // Find the next appropriate location.
+ Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);
+ End = Start + Obj.Size;
+ DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " << End
+ << "\n");
+ continue;
+ }
+ if (End <= R.End) {
+ DEBUG(dbgs() << " Reusing region(s).\n");
+ break;
+ }
+ }
+
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ if (End > LastRegionEnd) {
+ // Insert a new region at the end. Maybe two.
+ if (Start > LastRegionEnd) {
+ DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
+ << Start << "\n");
+ Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange());
+ LastRegionEnd = Start;
+ }
+ DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " << End
+ << ", range " << Obj.Range << "\n");
+ Regions.emplace_back(LastRegionEnd, End, Obj.Range);
+ LastRegionEnd = End;
+ }
+
+ // Split starting and ending regions if necessary.
+ for (unsigned i = 0; i < Regions.size(); ++i) {
+ StackRegion &R = Regions[i];
+ if (Start > R.Start && Start < R.End) {
+ StackRegion R0 = R;
+ R.Start = R0.End = Start;
+ Regions.insert(&R, R0);
+ continue;
+ }
+ if (End > R.Start && End < R.End) {
+ StackRegion R0 = R;
+ R0.End = R.Start = End;
+ Regions.insert(&R, R0);
+ break;
+ }
+ }
+
+ // Update live ranges for all affected regions.
+ for (StackRegion &R : Regions) {
+ if (Start < R.End && End > R.Start)
+ R.Range.Join(Obj.Range);
+ if (End <= R.End)
+ break;
+ }
+
+ ObjectOffsets[Obj.Handle] = End;
+}
+
+void StackLayout::computeLayout() {
+ // Simple greedy algorithm.
+ // If this is replaced with something smarter, it must preserve the property
+ // that the first object is always at the offset 0 in the stack frame (for
+ // StackProtectorSlot), or handle stack protector in some other way.
+ for (auto &Obj : StackObjects)
+ layoutObject(Obj);
+
+ DEBUG(print(dbgs()));
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
new file mode 100644
index 0000000..313ed21
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
@@ -0,0 +1,68 @@
+//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+
+#include "SafeStackColoring.h"
+
+namespace llvm {
+namespace safestack {
+
+/// Compute the layout of an unsafe stack frame.
+class StackLayout {
+ unsigned MaxAlignment;
+
+ struct StackRegion {
+ unsigned Start;
+ unsigned End;
+ StackColoring::LiveRange Range;
+ StackRegion(unsigned Start, unsigned End,
+ const StackColoring::LiveRange &Range)
+ : Start(Start), End(End), Range(Range) {}
+ };
+ /// The list of current stack regions, sorted by StackRegion::Start.
+ SmallVector<StackRegion, 16> Regions;
+
+ struct StackObject {
+ const Value *Handle;
+ unsigned Size, Alignment;
+ StackColoring::LiveRange Range;
+ };
+ SmallVector<StackObject, 8> StackObjects;
+
+ DenseMap<const Value *, unsigned> ObjectOffsets;
+
+ void layoutObject(StackObject &Obj);
+
+public:
+ StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+ /// Add an object to the stack frame. Value pointer is opaque and used as a
+ /// handle to retrieve the object's offset in the frame later.
+ void addObject(const Value *V, unsigned Size, unsigned Alignment,
+ const StackColoring::LiveRange &Range);
+
+ /// Run the layout computation for all previously added objects.
+ void computeLayout();
+
+ /// Returns the offset to the object start in the stack frame.
+ unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
+
+ /// Returns the size of the entire frame.
+ unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
+
+ /// Returns the alignment of the frame.
+ unsigned getFrameAlignment() { return MaxAlignment; }
+ void print(raw_ostream &OS);
+};
+
+} // namespace safestack
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 11b246a..22bfd4d 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,11 +14,11 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -27,6 +27,8 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -36,7 +38,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <queue>
using namespace llvm;
@@ -49,12 +50,51 @@ static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
+// Note: the two options below might be used in tuning compile time vs
+// output quality. Setting HugeRegion so large that it will never be
+// reached means best-effort, but may be slow.
+
+// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
+// together hold this many SUs, a reduction of maps will be done.
+static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
+ cl::init(1000), cl::desc("The limit to use while constructing the DAG "
+ "prior to scheduling, at which point a trade-off "
+ "is made to avoid excessive compile time."));
+
+static cl::opt<unsigned> ReductionSize(
+ "dag-maps-reduction-size", cl::Hidden,
+ cl::desc("A huge scheduling region will have maps reduced by this many "
+ "nodes at a time. Defaults to HugeRegion / 2."));
+
+static unsigned getReductionSize() {
+ // Always reduce a huge region with half of the elements, except
+ // when user sets this number explicitly.
+ if (ReductionSize.getNumOccurrences() == 0)
+ return HugeRegion / 2;
+ return ReductionSize;
+}
+
+static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << "{ ";
+ for (auto *su : L) {
+ dbgs() << "SU(" << su->NodeNum << ")";
+ if (su != L.back())
+ dbgs() << ", ";
+ }
+ dbgs() << "}\n";
+#endif
+}
+
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), FirstDbgValue(nullptr) {
+ TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ UnknownValue(UndefValue::get(
+ Type::getVoidTy(mf.getFunction()->getContext()))),
+ FirstDbgValue(nullptr) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -120,10 +160,6 @@ static void getUnderlyingObjects(const Value *V,
} while (!Working.empty());
}
-typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
-typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
-UnderlyingObjectsVector;
-
/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
/// information and it can be tracked to a normal reference to a known
/// object, return the Value for that object.
@@ -131,46 +167,46 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
UnderlyingObjectsVector &Objects,
const DataLayout &DL) {
- if (!MI->hasOneMemOperand() ||
- (!(*MI->memoperands_begin())->getValue() &&
- !(*MI->memoperands_begin())->getPseudoValue()) ||
- (*MI->memoperands_begin())->isVolatile())
- return;
-
- if (const PseudoSourceValue *PSV =
- (*MI->memoperands_begin())->getPseudoValue()) {
- // Function that contain tail calls don't have unique PseudoSourceValue
- // objects. Two PseudoSourceValues might refer to the same or overlapping
- // locations. The client code calling this function assumes this is not the
- // case. So return a conservative answer of no known object.
- if (MFI->hasTailCall())
- return;
+ auto allMMOsOkay = [&]() {
+ for (const MachineMemOperand *MMO : MI->memoperands()) {
+ if (MMO->isVolatile())
+ return false;
+
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
+ // Function that contain tail calls don't have unique PseudoSourceValue
+ // objects. Two PseudoSourceValues might refer to the same or
+ // overlapping locations. The client code calling this function assumes
+ // this is not the case. So return a conservative answer of no known
+ // object.
+ if (MFI->hasTailCall())
+ return false;
- // For now, ignore PseudoSourceValues which may alias LLVM IR values
- // because the code that uses this function has no way to cope with
- // such aliases.
- if (!PSV->isAliased(MFI)) {
- bool MayAlias = PSV->mayAlias(MFI);
- Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
- }
- return;
- }
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return false;
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return;
+ bool MayAlias = PSV->mayAlias(MFI);
+ Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
+ } else if (const Value *V = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs, DL);
- SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs, DL);
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V))
+ return false;
- for (Value *V : Objs) {
- if (!isIdentifiedObject(V)) {
- Objects.clear();
- return;
+ Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
+ }
+ } else
+ return false;
}
+ return true;
+ };
- Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
- }
+ if (!allMMOsOkay())
+ Objects.clear();
}
void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -475,10 +511,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// VReg2SUnit for the non-overlapping part.
LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
- if (NonOverlapMask != 0)
- CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
V2SU.SU = SU;
V2SU.LaneMask = OverlapMask;
+ if (NonOverlapMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
}
// If there was no CurrentVRegDefs entry for some lanes yet, create one.
if (LaneMask != 0)
@@ -518,84 +554,32 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() &&
- (!MI->mayLoad() || !MI->isInvariantLoad(AA)));
-}
-
-// This MI might have either incomplete info, or known to be unsafe
-// to deal with (i.e. volatile object).
-static inline bool isUnsafeMemoryObject(MachineInstr *MI,
- const MachineFrameInfo *MFI,
- const DataLayout &DL) {
- if (!MI || MI->memoperands_empty())
- return true;
- // We purposefully do no check for hasOneMemOperand() here
- // in hope to trigger an assert downstream in order to
- // finish implementation.
- if ((*MI->memoperands_begin())->isVolatile() ||
- MI->hasUnmodeledSideEffects())
- return true;
-
- if ((*MI->memoperands_begin())->getPseudoValue()) {
- // Similarly to getUnderlyingObjectForInstr:
- // For now, ignore PseudoSourceValues which may alias LLVM IR values
- // because the code that uses this function has no way to cope with
- // such aliases.
- return true;
- }
-
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return true;
-
- SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs, DL);
- for (Value *V : Objs) {
- // Does this pointer refer to a distinct and identifiable object?
- if (!isIdentifiedObject(V))
- return true;
- }
-
- return false;
+ (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
}
/// This returns true if the two MIs need a chain edge between them.
-/// If these are not even memory operations, we still may need
-/// chain deps between them. The question really is - could
-/// these two MIs be reordered during scheduling from memory dependency
-/// point of view.
+/// This is called on normal stores and loads.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
const DataLayout &DL, MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- // Cover a trivial case - no edge is need to itself.
- if (MIa == MIb)
- return false;
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if ((MIa->mayLoad() || MIa->mayStore()) &&
- (MIb->mayLoad() || MIb->mayStore()))
- if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
- return false;
+ assert ((MIa->mayStore() || MIb->mayStore()) &&
+ "Dependency checked between two loads");
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- return true;
-
- if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
- return true;
-
- // If we are dealing with two "normal" loads, we do not need an edge
- // between them - they could be reordered.
- if (!MIa->mayStore() && !MIb->mayStore())
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
return false;
// To this point analysis is generic. From here on we do need AA.
if (!AA)
return true;
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ return true;
+
MachineMemOperand *MMOa = *MIa->memoperands_begin();
MachineMemOperand *MMOb = *MIb->memoperands_begin();
@@ -634,106 +618,15 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
return (AAResult != NoAlias);
}
-/// This recursive function iterates over chain deps of SUb looking for
-/// "latest" node that needs a chain edge to SUa.
-static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SUa, SUnit *SUb,
- SUnit *ExitSU, unsigned *Depth,
- SmallPtrSetImpl<const SUnit *> &Visited) {
- if (!SUa || !SUb || SUb == ExitSU)
- return *Depth;
-
- // Remember visited nodes.
- if (!Visited.insert(SUb).second)
- return *Depth;
- // If there is _some_ dependency already in place, do not
- // descend any further.
- // TODO: Need to make sure that if that dependency got eliminated or ignored
- // for any reason in the future, we would not violate DAG topology.
- // Currently it does not happen, but makes an implicit assumption about
- // future implementation.
- //
- // Independently, if we encounter node that is some sort of global
- // object (like a call) we already have full set of dependencies to it
- // and we can stop descending.
- if (SUa->isSucc(SUb) ||
- isGlobalMemoryObject(AA, SUb->getInstr()))
- return *Depth;
-
- // If we do need an edge, or we have exceeded depth budget,
- // add that edge to the predecessors chain of SUb,
- // and stop descending.
- if (*Depth > 200 ||
- MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
- SUb->addPred(SDep(SUa, SDep::MayAliasMem));
- return *Depth;
- }
- // Track current depth.
- (*Depth)++;
- // Iterate over memory dependencies only.
- for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
- I != E; ++I)
- if (I->isNormalMemoryOrBarrier())
- iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
- return *Depth;
-}
-
-/// This function assumes that "downward" from SU there exist
-/// tail/leaf of already constructed DAG. It iterates downward and
-/// checks whether SU can be aliasing any node dominated
-/// by it.
-static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
- std::set<SUnit *> &CheckList,
- unsigned LatencyToLoad) {
- if (!SU)
- return;
-
- SmallPtrSet<const SUnit*, 16> Visited;
- unsigned Depth = 0;
-
- for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
- I != IE; ++I) {
- if (SU == *I)
- continue;
- if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
- SDep Dep(SU, SDep::MayAliasMem);
- Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
- (*I)->addPred(Dep);
- }
-
- // Iterate recursively over all previously added memory chain
- // successors. Keep track of visited nodes.
- for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
- JE = (*I)->Succs.end(); J != JE; ++J)
- if (J->isNormalMemoryOrBarrier())
- iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
- Visited);
- }
-}
-
-/// Check whether two objects need a chain edge, if so, add it
-/// otherwise remember the rejected SU.
-static inline void addChainDependency(AliasAnalysis *AA,
- const MachineFrameInfo *MFI,
- const DataLayout &DL, SUnit *SUa,
- SUnit *SUb, std::set<SUnit *> &RejectList,
- unsigned TrueMemOrderLatency = 0,
- bool isNormalMemory = false) {
- // If this is a false dependency,
- // do not add the edge, but remember the rejected node.
- if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
- SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
- Dep.setLatency(TrueMemOrderLatency);
+/// Check whether two objects need a chain edge and add it if needed.
+void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
+ unsigned Latency) {
+ if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
+ SUb->getInstr())) {
+ SDep Dep(SUa, SDep::MayAliasMem);
+ Dep.setLatency(Latency);
SUb->addPred(Dep);
}
- else {
- // Duplicate entries should be ignored.
- RejectList.insert(SUb);
- DEBUG(dbgs() << "\tReject chain dep between SU("
- << SUa->NodeNum << ") and SU("
- << SUb->NodeNum << ")\n");
- }
}
/// Create an SUnit for each real instruction, numbered in top-down topological
@@ -752,16 +645,15 @@ void ScheduleDAGInstrs::initSUnits() {
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
- for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
- MachineInstr *MI = I;
- if (MI->isDebugValue())
+ for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ if (MI.isDebugValue())
continue;
- SUnit *SU = newSUnit(MI);
- MISUnitMap[MI] = SU;
+ SUnit *SU = newSUnit(&MI);
+ MISUnitMap[&MI] = SU;
- SU->isCall = MI->isCall();
- SU->isCommutable = MI->isCommutable();
+ SU->isCall = MI.isCall();
+ SU->isCommutable = MI.isCommutable();
// Assign the Latency field of SU using target-provided information.
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
@@ -808,6 +700,19 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
+ // Ignore re-defs.
+ if (TrackLaneMasks) {
+ bool FoundDef = false;
+ for (const MachineOperand &MO2 : MI->operands()) {
+ if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ FoundDef = true;
+ break;
+ }
+ }
+ if (FoundDef)
+ continue;
+ }
+
// Record this local VReg use.
VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
for (; UI != VRegUses.end(); ++UI) {
@@ -819,17 +724,136 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
}
}
+class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
+
+ /// Current total number of SUs in map.
+ unsigned NumNodes;
+
+ /// 1 for loads, 0 for stores. (see comment in SUList)
+ unsigned TrueMemOrderLatency;
+public:
+
+ Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+
+ /// To keep NumNodes up to date, insert() is used instead of
+ /// this operator w/ push_back().
+ ValueType &operator[](const SUList &Key) {
+ llvm_unreachable("Don't use. Use insert() instead."); };
+
+ /// Add SU to the SUList of V. If Map grows huge, reduce its size
+ /// by calling reduce().
+ void inline insert(SUnit *SU, ValueType V) {
+ MapVector::operator[](V).push_back(SU);
+ NumNodes++;
+ }
+
+ /// Clears the list of SUs mapped to V.
+ void inline clearList(ValueType V) {
+ iterator Itr = find(V);
+ if (Itr != end()) {
+ assert (NumNodes >= Itr->second.size());
+ NumNodes -= Itr->second.size();
+
+ Itr->second.clear();
+ }
+ }
+
+ /// Clears map from all contents.
+ void clear() {
+ MapVector<ValueType, SUList>::clear();
+ NumNodes = 0;
+ }
+
+ unsigned inline size() const { return NumNodes; }
+
+ /// Count the number of SUs in this map after a reduction.
+ void reComputeSize(void) {
+ NumNodes = 0;
+ for (auto &I : *this)
+ NumNodes += I.second.size();
+ }
+
+ unsigned inline getTrueMemOrderLatency() const {
+ return TrueMemOrderLatency;
+ }
+
+ void dump();
+};
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap) {
+ for (auto &I : Val2SUsMap)
+ addChainDependencies(SU, I.second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap,
+ ValueType V) {
+ Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
+ if (Itr != Val2SUsMap.end())
+ addChainDependencies(SU, Itr->second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ for (auto &I : map) {
+ SUList &sus = I.second;
+ for (auto *SU : sus)
+ SU->addPredBarrier(BarrierChain);
+ }
+ map.clear();
+}
+
+void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ // Go through all lists of SUs.
+ for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
+ Value2SUsMap::iterator CurrItr = I++;
+ SUList &sus = CurrItr->second;
+ SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
+ for (; SUItr != SUEE; ++SUItr) {
+ // Stop on BarrierChain or any instruction above it.
+ if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
+ break;
+
+ (*SUItr)->addPredBarrier(BarrierChain);
+ }
+
+ // Remove also the BarrierChain from list if present.
+ if (SUItr != SUEE && *SUItr == BarrierChain)
+ SUItr++;
+
+ // Remove all SUs that are now successors of BarrierChain.
+ if (SUItr != sus.begin())
+ sus.erase(sus.begin(), SUItr);
+ }
+
+ // Remove all entries with empty su lists.
+ map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
+ return (mapEntry.second.empty()); });
+
+ // Recompute the size of the map (NumNodes).
+ map.reComputeSize();
+}
+
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
+ LiveIntervals *LIS,
bool TrackLaneMasks) {
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
- AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
+ AAForDep = UseAA ? AA : nullptr;
+
+ BarrierChain = nullptr;
this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
@@ -841,19 +865,25 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (PDiffs)
PDiffs->init(SUnits.size());
- // We build scheduling units by walking a block's instruction list from bottom
- // to top.
-
- // Remember where a generic side-effecting instruction is as we proceed.
- SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
-
- // Memory references to specific known memory locations are tracked
- // so that they can be given more precise dependencies. We track
- // separately the known memory locations that may alias and those
- // that are known not to alias
- MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
- MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
- std::set<SUnit*> RejectMemNodes;
+ // We build scheduling units by walking a block's instruction list
+ // from bottom to top.
+
+ // Each MIs' memory operand(s) is analyzed to a list of underlying
+ // objects. The SU is then inserted in the SUList(s) mapped from the
+ // Value(s). Each Value thus gets mapped to lists of SUs depending
+ // on it, stores and loads kept separately. Two SUs are trivially
+ // non-aliasing if they both depend on only identified Values and do
+ // not share any common Value.
+ Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
+
+ // Certain memory accesses are known to not alias any SU in Stores
+ // or Loads, and have therefore their own 'NonAlias'
+ // domain. E.g. spill / reload instructions never alias LLVM I/R
+ // Values. It would be nice to assume that this type of memory
+ // accesses always have a proper memory operand modelling, and are
+ // therefore never unanalyzable, but this is conservatively not
+ // done.
+ Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
@@ -882,283 +912,201 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
MachineInstr *DbgMI = nullptr;
for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
- MachineInstr *MI = std::prev(MII);
- if (MI && DbgMI) {
- DbgValues.push_back(std::make_pair(DbgMI, MI));
+ MachineInstr &MI = *std::prev(MII);
+ if (DbgMI) {
+ DbgValues.push_back(std::make_pair(DbgMI, &MI));
DbgMI = nullptr;
}
- if (MI->isDebugValue()) {
- DbgMI = MI;
+ if (MI.isDebugValue()) {
+ DbgMI = &MI;
continue;
}
- SUnit *SU = MISUnitMap[MI];
+ SUnit *SU = MISUnitMap[&MI];
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
collectVRegUses(SU);
RegisterOperands RegOpers;
- RegOpers.collect(*MI, *TRI, MRI);
+ RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
+ }
if (PDiffs != nullptr)
PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
RPTracker->recedeSkipDebugValues();
- assert(&*RPTracker->getPos() == MI && "RPTracker in sync");
+ assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
RPTracker->recede(RegOpers);
}
assert(
- (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
+ (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
"Cannot schedule terminators or labels!");
// Add register-based dependencies (data, anti, and output).
+ // For some instructions (calls, returns, inline-asm, etc.) there can
+ // be explicit uses and implicit defs, in which case the use will appear
+ // on the operand list before the def. Do two passes over the operand
+ // list to make sure that defs are processed before any uses.
bool HasVRegDef = false;
- for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
- if (!MO.isReg()) continue;
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
+ if (Reg == 0)
+ continue;
if (TRI->isPhysicalRegister(Reg))
addPhysRegDeps(SU, j);
else {
- if (MO.isDef()) {
- HasVRegDef = true;
- addVRegDefDeps(SU, j);
- }
- else if (MO.readsReg()) // ignore undef operands
- addVRegUseDeps(SU, j);
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
}
}
+ // Now process all uses.
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ // Only look at use operands.
+ // We do not need to check for MO.readsReg() here because subsequent
+ // subregister defs will get output dependence edges and need no
+ // additional use dependencies.
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
+ }
+
// If we haven't seen any uses in this scheduling region, create a
// dependence edge to ExitSU to model the live-out latency. This is required
// for vreg defs with no in-region use, and prefetches with no vreg def.
//
// FIXME: NumDataSuccs would be more precise than NumSuccs here. This
// check currently relies on being called before adding chain deps.
- if (SU->NumSuccs == 0 && SU->Latency > 1
- && (HasVRegDef || MI->mayLoad())) {
+ if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
SDep Dep(SU, SDep::Artificial);
Dep.setLatency(SU->Latency - 1);
ExitSU.addPred(Dep);
}
- // Add chain dependencies.
- // Chain dependencies used to enforce memory order should have
- // latency of 0 (except for true dependency of Store followed by
- // aliased Load... we estimate that with a single cycle of latency
- // assuming the hardware will bypass)
- // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
- // after stack slots are lowered to actual addresses.
- // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
- // produce more precise dependence information.
- unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
- if (isGlobalMemoryObject(AA, MI)) {
- // Be conservative with these and add dependencies on all memory
- // references, even those that are known to not alias.
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
- I->second[i]->addPred(SDep(SU, SDep::Barrier));
- }
- }
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
- SDep Dep(SU, SDep::Barrier);
- Dep.setLatency(TrueMemOrderLatency);
- I->second[i]->addPred(Dep);
- }
- }
- // Add SU to the barrier chain.
+ // Add memory dependencies (Note: isStoreToStackSlot and
+ // isLoadFromStackSLot are not usable after stack slots are lowered to
+ // actual addresses).
+
+ // This is a barrier event that acts as a pivotal node in the DAG.
+ if (isGlobalMemoryObject(AA, &MI)) {
+
+ // Become the barrier chain.
if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ BarrierChain->addPredBarrier(SU);
BarrierChain = SU;
- // This is a barrier event that acts as a pivotal node in the DAG,
- // so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- RejectMemNodes.clear();
- NonAliasMemDefs.clear();
- NonAliasMemUses.clear();
-
- // fall-through
- new_alias_chain:
- // Chain all possibly aliasing memory references through SU.
- if (AliasChain) {
- unsigned ChainLatency = 0;
- if (AliasChain->getInstr()->mayLoad())
- ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes, ChainLatency);
- }
- AliasChain = SU;
- for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- PendingLoads[k], RejectMemNodes,
- TrueMemOrderLatency);
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes);
- }
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, TrueMemOrderLatency);
- }
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency() possibly
- // adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- PendingLoads.clear();
- AliasMemDefs.clear();
- AliasMemUses.clear();
- } else if (MI->mayStore()) {
- // Add dependence on barrier chain, if needed.
- // There is no point to check aliasing on barrier event. Even if
- // SU and barrier _could_ be reordered, they should not. In addition,
- // we have lost all RejectMemNodes below barrier.
- if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
- UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+ DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
- if (Objs.empty()) {
- // Treat all other stores conservatively.
- goto new_alias_chain;
- }
+ // Add dependencies against everything below it and clear maps.
+ addBarrierChain(Stores);
+ addBarrierChain(Loads);
+ addBarrierChain(NonAliasStores);
+ addBarrierChain(NonAliasLoads);
- bool MayAlias = false;
- for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
- K != KE; ++K) {
- ValueType V = K->getPointer();
- bool ThisMayAlias = K->getInt();
- if (ThisMayAlias)
- MayAlias = true;
-
- // A store to a specific PseudoSourceValue. Add precise dependencies.
- // Record the def in MemDefs, first adding a dep if there is
- // an existing def.
- MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
- ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
- if (I != IE) {
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, 0, true);
-
- // If we're not using AA, then we only need one store per object.
- if (!AAForDep)
- I->second.clear();
- I->second.push_back(SU);
- } else {
- if (ThisMayAlias) {
- if (!AAForDep)
- AliasMemDefs[V].clear();
- AliasMemDefs[V].push_back(SU);
- } else {
- if (!AAForDep)
- NonAliasMemDefs[V].clear();
- NonAliasMemDefs[V].push_back(SU);
- }
+ continue;
+ }
+
+ // If it's not a store or a variant load, we're done.
+ if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+ continue;
+
+ // Always add dependecy edge to BarrierChain if present.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ // Find the underlying objects for MI. The Objs vector is either
+ // empty, or filled with the Values of memory locations which this
+ // SU depends on. An empty vector means the memory location is
+ // unknown, and may alias anything.
+ UnderlyingObjectsVector Objs;
+ getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout());
+
+ if (MI.mayStore()) {
+ if (Objs.empty()) {
+ // An unknown store depends on all stores and loads.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+ addChainDependencies(SU, Loads);
+ addChainDependencies(SU, NonAliasLoads);
+
+ // Map this store to 'UnknownValue'.
+ Stores.insert(SU, UnknownValue);
+ } else {
+ // Add precise dependencies against all previously seen memory
+ // accesses mapped to the same Value(s).
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add dependencies to previous stores and loads mapped to V.
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+ addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
}
- // Handle the uses in MemUses, if there are any.
- MapVector<ValueType, std::vector<SUnit *> >::iterator J =
- ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
- ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
- if (J != JE) {
- for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- J->second[i], RejectMemNodes,
- TrueMemOrderLatency, true);
- J->second.clear();
+ // Update the store map after all chains have been added to avoid adding
+ // self-loop edge if multiple underlying objects are present.
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Map this store to V.
+ (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
}
+ // The store may have dependencies to unanalyzable loads and
+ // stores.
+ addChainDependencies(SU, Loads, UnknownValue);
+ addChainDependencies(SU, Stores, UnknownValue);
}
- if (MayAlias) {
- // Add dependencies from all the PendingLoads, i.e. loads
- // with no underlying object.
- for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- PendingLoads[k], RejectMemNodes,
- TrueMemOrderLatency);
- // Add dependence on alias chain, if needed.
- if (AliasChain)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes);
- }
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency() possibly
- // adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
- TrueMemOrderLatency);
- } else if (MI->mayLoad()) {
- bool MayAlias = true;
- if (MI->isInvariantLoad(AA)) {
- // Invariant load, no chain dependencies needed!
+ } else { // SU is a load.
+ if (Objs.empty()) {
+ // An unknown load depends on all stores.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+
+ Loads.insert(SU, UnknownValue);
} else {
- UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
-
- if (Objs.empty()) {
- // A load with no underlying object. Depend on all
- // potentially aliasing stores.
- for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes);
-
- PendingLoads.push_back(SU);
- MayAlias = true;
- } else {
- MayAlias = false;
- }
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add precise dependencies against all previously seen stores
+ // mapping to the same Value(s).
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
- for (UnderlyingObjectsVector::iterator
- J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
- ValueType V = J->getPointer();
- bool ThisMayAlias = J->getInt();
-
- if (ThisMayAlias)
- MayAlias = true;
-
- // A load from a specific PseudoSourceValue. Add precise dependencies.
- MapVector<ValueType, std::vector<SUnit *> >::iterator I =
- ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
- ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
- if (I != IE)
- for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
- I->second[i], RejectMemNodes, 0, true);
- if (ThisMayAlias)
- AliasMemUses[V].push_back(SU);
- else
- NonAliasMemUses[V].push_back(SU);
+ // Map this load to V.
+ (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
}
- // Add dependencies on alias and barrier chains, if needed.
- if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
- RejectMemNodes);
- if (MayAlias)
- // This call must come after calls to addChainDependency() since it
- // consumes the 'RejectMemNodes' list that addChainDependency()
- // possibly adds to.
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
- RejectMemNodes, /*Latency=*/0);
- if (BarrierChain)
- BarrierChain->addPred(SDep(SU, SDep::Barrier));
+ // The load may have dependencies to unanalyzable stores.
+ addChainDependencies(SU, Stores, UnknownValue);
}
}
+
+ // Reduce maps if they grow huge.
+ if (Stores.size() + Loads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
+ reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
+ }
+ if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
+ reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
+ }
}
+
if (DbgMI)
FirstDbgValue = DbgMI;
@@ -1166,7 +1114,84 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Uses.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
- PendingLoads.clear();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
+ PSV->printCustom(OS);
+ return OS;
+}
+
+void ScheduleDAGInstrs::Value2SUsMap::dump() {
+ for (auto &Itr : *this) {
+ if (Itr.first.is<const Value*>()) {
+ const Value *V = Itr.first.get<const Value*>();
+ if (isa<UndefValue>(V))
+ dbgs() << "Unknown";
+ else
+ V->printAsOperand(dbgs());
+ }
+ else if (Itr.first.is<const PseudoSourceValue*>())
+ dbgs() << Itr.first.get<const PseudoSourceValue*>();
+ else
+ llvm_unreachable("Unknown Value type.");
+
+ dbgs() << " : ";
+ dumpSUList(Itr.second);
+ }
+}
+
+/// Reduce maps in FIFO order, by N SUs. This is better than turning
+/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
+/// it avoids unnecessary edges between seen SUs above the new
+/// BarrierChain, and those below it.
+void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
+ Value2SUsMap &loads, unsigned N) {
+ DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
+
+ // Insert all SU's NodeNums into a vector and sort it.
+ std::vector<unsigned> NodeNums;
+ NodeNums.reserve(stores.size() + loads.size());
+ for (auto &I : stores)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ for (auto &I : loads)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ std::sort(NodeNums.begin(), NodeNums.end());
+
+ // The N last elements in NodeNums will be removed, and the SU with
+ // the lowest NodeNum of them will become the new BarrierChain to
+ // let the not yet seen SUs have a dependency to the removed SUs.
+ assert (N <= NodeNums.size());
+ SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
+ if (BarrierChain) {
+ // The aliasing and non-aliasing maps reduce independently of each
+ // other, but share a common BarrierChain. Check if the
+ // newBarrierChain is above the former one. If it is not, it may
+ // introduce a loop to use newBarrierChain, so keep the old one.
+ if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
+ BarrierChain->addPredBarrier(newBarrierChain);
+ BarrierChain = newBarrierChain;
+ DEBUG(dbgs() << "Inserting new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ DEBUG(dbgs() << "Keeping old barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ BarrierChain = newBarrierChain;
+
+ insertBarrierChain(stores);
+ insertBarrierChain(loads);
+
+ DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
}
/// \brief Initialize register live-range state for updating kills.
@@ -1190,7 +1215,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
/// operands, then we also need to propagate that to any instructions inside
/// the bundle which had the same kill state.
static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
- bool NewKillState) {
+ bool NewKillState,
+ const TargetRegisterInfo *TRI) {
if (MI->getOpcode() != TargetOpcode::BUNDLE)
return;
@@ -1199,30 +1225,13 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
// might set it on too many operands. We will clear as many flags as we
// can though.
MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
while (Begin != End) {
- for (MachineOperand &MO : (--End)->operands()) {
- if (!MO.isReg() || MO.isDef() || Reg != MO.getReg())
- continue;
-
- // DEBUG_VALUE nodes do not contribute to code generation and should
- // always be ignored. Failure to do so may result in trying to modify
- // KILL flags on DEBUG_VALUE nodes, which is distressing.
- if (MO.isDebug())
- continue;
-
- // If the register has the internal flag then it could be killing an
- // internal def of the register. In this case, just skip. We only want
- // to toggle the flag on operands visible outside the bundle.
- if (MO.isInternalRead())
- continue;
-
- if (MO.isKill() == NewKillState)
- continue;
- MO.setIsKill(NewKillState);
- if (NewKillState)
- return;
- }
+ if (NewKillState) {
+ if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ return;
+ } else
+ (--End)->clearRegisterKills(Reg, TRI);
}
}
@@ -1230,21 +1239,21 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
// Setting kill flag...
if (!MO.isKill()) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
return false;
}
// If MO itself is live, clear the kill flag...
if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
return false;
}
// If any subreg of MO is live, then create an imp-def for that
// subreg and keep MO marked as killed.
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
MachineInstrBuilder MIB(MF, MI);
@@ -1257,7 +1266,7 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
if(AllDead) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
}
return false;
}
@@ -1275,15 +1284,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
unsigned Count = MBB->size();
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
I != E; --Count) {
- MachineInstr *MI = --I;
- if (MI->isDebugValue())
+ MachineInstr &MI = *--I;
+ if (MI.isDebugValue())
continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
// are completely defined.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isRegMask())
LiveRegs.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg()) continue;
@@ -1291,7 +1300,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (Reg == 0) continue;
if (!MO.isDef()) continue;
// Ignore two-addr defs.
- if (MI->isRegTiedToUseOperand(i)) continue;
+ if (MI.isRegTiedToUseOperand(i)) continue;
// Repeat for reg and all subregs.
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -1303,8 +1312,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// register is used multiple times we only set the kill flag on
// the first use. Don't set kill flags on undef operands.
killedRegs.reset();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
@@ -1329,13 +1338,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
// Warning: toggleKillFlag may invalidate MO.
- toggleKillFlag(MI, MO);
- DEBUG(MI->dump());
- DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
- while (++Begin != End)
- DEBUG(Begin->dump());
+ toggleKillFlag(&MI, MO);
+ DEBUG(MI.dump());
+ DEBUG({
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ MachineBasicBlock::instr_iterator Begin = MI.getIterator();
+ MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ while (++Begin != End)
+ DEBUG(Begin->dump());
+ }
});
}
@@ -1344,8 +1355,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// Mark any used register (that is not using undef) and subregs as
// now live...
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 1150d26..ca2881c 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 38833a4..69c4870 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -23,22 +23,13 @@
using namespace llvm;
-#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#define DEBUG_TYPE DebugType
-#ifndef NDEBUG
-const char *ScoreboardHazardRecognizer::DebugType = "";
-#endif
-
-ScoreboardHazardRecognizer::
-ScoreboardHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *SchedDAG,
- const char *ParentDebugType) :
- ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
- IssueCount(0) {
-
-#ifndef NDEBUG
- DebugType = ParentDebugType;
-#endif
+ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType)
+ : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
+ DAG(SchedDAG), IssueWidth(0), IssueCount(0) {
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
@@ -91,7 +82,7 @@ void ScoreboardHazardRecognizer::Reset() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const {
dbgs() << "Scoreboard:\n";
unsigned last = Depth - 1;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c741982..5ecc6da 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -112,7 +113,7 @@ namespace {
///
/// This is used to allow us to reliably add any operands of a DAG node
/// which have not yet been combined to the worklist.
- SmallPtrSet<SDNode *, 64> CombinedNodes;
+ SmallPtrSet<SDNode *, 32> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
@@ -211,8 +212,8 @@ namespace {
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
- void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
- SDValue Trunc, SDValue ExtLoad, SDLoc DL,
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
+ SDValue ExtLoad, const SDLoc &DL,
ISD::NodeType ExtType);
/// Call the node-specific routine that knows how to fold each
@@ -258,6 +259,7 @@ namespace {
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitBSWAP(SDNode *N);
+ SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
@@ -273,6 +275,7 @@ namespace {
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
@@ -326,18 +329,19 @@ namespace {
SDValue visitFMULForFMACombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
+ SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
+ SDValue RHS);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
- SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
- SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
- SDValue N3, ISD::CondCode CC,
+ SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
- SDLoc DL, bool foldBooleans = true);
+ const SDLoc &DL, bool foldBooleans = true);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
@@ -353,19 +357,21 @@ namespace {
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
- SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
- SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
- SDNodeFlags *Flags);
- SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
- SDNodeFlags *Flags);
+ SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
+ SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal);
+ SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
- SDLoc DL);
- SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
+ const SDLoc &DL);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
@@ -386,10 +392,17 @@ namespace {
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
- /// Do FindBetterChain for a store and any possibly adjacent stores on
- /// consecutive chains.
+ /// Try to replace a store and any possibly adjacent stores on
+ /// consecutive chains with better chains. Return true only if St is
+ /// replaced.
+ ///
+ /// Notice that other chains may still be replaced even if the function
+ /// returns false.
bool findBetterNeighborChains(StoreSDNode *St);
+ /// Match "(X shl/srl V1) & V2" where V2 may not be present.
+ bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
+
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
@@ -414,8 +427,7 @@ namespace {
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
/// constant build_vector of the stored constant values in Stores.
- SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
- SDLoc SL,
+ SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
ArrayRef<MemOpLink> Stores,
SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
@@ -444,6 +456,12 @@ namespace {
StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+ /// Helper function for MergeConsecutiveStores. Checks if
+ /// Candidate stores have indirect dependency through their
+ /// operands. \return True if safe to merge
+ bool checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes);
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
@@ -747,32 +765,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-/// Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
- BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
- if (!C)
- return false;
-
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize);
-}
-
-// \brief Returns the SDNode if it is a constant integer BuildVector
-// or constant integer.
-static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
- if (isa<ConstantSDNode>(N))
- return N.getNode();
- if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
- return N.getNode();
- return nullptr;
-}
-
// \brief Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
@@ -821,12 +813,12 @@ static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
return nullptr;
}
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
- SDValue N0, SDValue N1) {
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc) {
- if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
@@ -845,17 +837,17 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
}
if (N1.getOpcode() == Opc) {
- if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
+ if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+ if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
return SDValue();
}
if (N1.hasOneUse()) {
- // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
+ // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
// use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
if (!OpNode.getNode())
return SDValue();
AddToWorklist(OpNode.getNode());
@@ -962,7 +954,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
SDLoc dl(Op);
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ if (ISD::isUNINDEXEDLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
@@ -1166,6 +1159,9 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
if (!LegalOperations)
return false;
+ if (!ISD::isUNINDEXEDLoad(Op.getNode()))
+ return false;
+
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return false;
@@ -1259,8 +1255,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // while the worklist isn't empty, find a node and
- // try and combine it.
+ // While the worklist isn't empty, find a node and try to combine it.
while (!WorklistMap.empty()) {
SDNode *N;
// The Worklist holds the SDNodes in order, but it may contain null entries.
@@ -1326,8 +1321,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DEBUG(dbgs() << " ... into: ";
RV.getNode()->dump(&DAG));
- // Transfer debug value.
- DAG.TransferDbgValues(SDValue(N, 0), RV);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
@@ -1388,6 +1381,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
case ISD::BSWAP: return visitBSWAP(N);
+ case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
@@ -1403,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
@@ -1628,8 +1623,8 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
-/// ContantSDNode pointer else nullptr.
+/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
+/// ConstantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
@@ -1653,38 +1648,32 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
// fold (add x, undef) -> undef
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return N0;
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
- // fold (add c1, c2) -> c1+c2
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
- // canonicalize constant to RHS
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+ // fold (add c1, c2) -> c1+c2
+ return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+ }
// fold (add x, 0) -> x
if (isNullConstant(N1))
return N0;
- // fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
- if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
- GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
- GA->getOffset() +
- (uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (N1C && N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
- SDLoc DL(N);
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), DL, VT),
- N0.getOperand(1));
- }
+ if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+ if (N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), DL, VT),
+ N0.getOperand(1));
+ }
+ }
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
return RADD;
@@ -1850,9 +1839,9 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
-static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
- SelectionDAG &DAG,
- bool LegalOperations, bool LegalTypes) {
+static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations,
+ bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, DL, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
@@ -1879,11 +1868,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
- // fold (sub c1, c2) -> c1-c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub c1, c2) -> c1-c2
+ return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+ }
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
- if (N0C && N1C)
- return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N1C) {
SDLoc DL(N);
@@ -1933,9 +1925,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return N0;
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
// If the relocation model supports it, consider symbol offsets.
@@ -2013,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
EVT VT = N0.getValueType();
// fold (mul x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
bool N0IsConst = false;
@@ -2026,8 +2018,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
- N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
+ N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
+ N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
N0IsConst = isa<ConstantSDNode>(N0);
if (N0IsConst) {
@@ -2047,8 +2039,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getNode(), N1.getNode());
// canonicalize constant to RHS (vector doesn't have to splat)
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
@@ -2091,23 +2083,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
APInt Val;
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N1IsConst && N0.getOpcode() == ISD::SHL &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1)))) {
- SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
- N1, N0.getOperand(1));
+ (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+ isa<ConstantSDNode>(N0.getOperand(1)))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
AddToWorklist(C3.getNode());
- return DAG.getNode(ISD::MUL, SDLoc(N), VT,
- N0.getOperand(0), C3);
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
- SDValue Sh(nullptr,0), Y(nullptr,0);
+ SDValue Sh(nullptr, 0), Y(nullptr, 0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))) &&
+ (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+ isa<ConstantSDNode>(N0.getOperand(1))) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
@@ -2117,17 +2107,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT,
- Mul, Sh.getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (isConstantIntBuildVectorOrConstantInt(N1) &&
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
N0.getOpcode() == ISD::ADD &&
- isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
DAG.getNode(ISD::MUL, SDLoc(N0), VT,
@@ -2146,7 +2134,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
const TargetLowering &TLI) {
RTLIB::Libcall LC;
- switch (Node->getSimpleValueType(0).SimpleTy) {
+ EVT NodeType = Node->getValueType(0);
+ if (!NodeType.isSimple())
+ return false;
+ switch (NodeType.getSimpleVT().SimpleTy) {
default: return false; // No libcall for vector types.
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
@@ -2163,14 +2154,18 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
if (Node->use_empty())
return SDValue(); // This is a dead node, leave it alone.
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+
+ // DivMod lib calls can still work on non-legal types if using lib-calls.
EVT VT = Node->getValueType(0);
- if (!TLI.isTypeLegal(VT))
+ if (VT.isVector() || !VT.isInteger())
return SDValue();
- unsigned Opcode = Node->getOpcode();
- bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+ if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
+ return SDValue();
- unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
// If DIVREM is going to get expanded into a libcall,
// but there is no libcall available, then don't combine.
if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
@@ -2314,10 +2309,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return DivRem;
// undef / X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
// X / undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
return SDValue();
@@ -2378,10 +2373,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return DivRem;
// undef / X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
// X / undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
return SDValue();
@@ -2419,15 +2414,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
}
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1,
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
- VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
- }
+ ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
+ if (SHC && SHC->getAPIntValue().isPowerOf2()) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
}
@@ -2462,10 +2455,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return DivRem.getValue(1);
// undef % X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return DAG.getConstant(0, DL, VT);
// X % undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
return SDValue();
@@ -2489,7 +2482,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
getShiftAmountTy(N0.getValueType())));
}
// fold (mulhs x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
@@ -2525,7 +2518,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (isOneConstant(N1))
return DAG.getConstant(0, DL, N0.getValueType());
// fold (mulhu x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhu to a wider multiply
@@ -2698,8 +2691,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
return SDValue();
@@ -2761,7 +2754,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
- // Only perform this optimization after type legalization and before
+ // Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
// we don't want to undo this promotion.
@@ -2769,7 +2762,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST ||
N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
- Level == AfterLegalizeTypes) {
+ Level <= AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();
@@ -2814,7 +2807,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// Don't try to fold this node if it requires introducing a
// build vector of all zeros that might be illegal at this stage.
- if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
if (!LegalTypes)
ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
@@ -2829,13 +2822,13 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
N0->getOperand(0), N1->getOperand(0));
AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
- &SVN0->getMask()[0]);
+ SVN0->getMask());
}
// Don't try to fold this node if it requires introducing a
// build vector of all zeros that might be illegal at this stage.
ShOp = N0->getOperand(0);
- if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
if (!LegalTypes)
ShOp = DAG.getConstant(0, SDLoc(N), VT);
else
@@ -2850,7 +2843,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
N0->getOperand(1), N1->getOperand(1));
AddToWorklist(NewNode.getNode());
return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
- &SVN0->getMask()[0]);
+ SVN0->getMask());
}
}
}
@@ -2867,7 +2860,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
EVT VT = N1.getValueType();
// fold (and x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() || N1.isUndef())
return DAG.getConstant(0, SDLoc(LocReference), VT);
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
SDValue LL, LR, RL, RR, CC0, CC1;
@@ -2965,6 +2958,50 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
}
}
+ // Reduce bit extract of low half of an integer to the narrower type.
+ // (and (srl i64:x, K), KMask) ->
+ // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Size = VT.getSizeInBits();
+ const APInt &AndMask = CAnd->getAPIntValue();
+ unsigned ShiftBits = CShift->getZExtValue();
+ unsigned MaskBits = AndMask.countTrailingOnes();
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
+
+ if (APIntOps::isMask(AndMask) &&
+ // Required bits must not span the two halves of the integer and
+ // must fit in the half size type.
+ (ShiftBits + MaskBits <= Size / 2) &&
+ TLI.isNarrowingProfitable(VT, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ TLI.isZExtFree(HalfVT, VT)) {
+ // The isNarrowingProfitable is to avoid regressions on PPC and
+ // AArch64 which match a few 64-bit bit insert / bit extract patterns
+ // on downstream users of this. Those patterns could probably be
+ // extended to handle extensions mixed in.
+
+ SDValue SL(N0);
+ assert(ShiftBits != 0 && MaskBits <= Size);
+
+ // Extracting the highest bit of the low half.
+ EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
+ N0.getOperand(0));
+
+ SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
+ SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
+ SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
+ }
+ }
+ }
+ }
+
return SDValue();
}
@@ -3045,8 +3082,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
// fold (and x, -1) -> x
if (isAllOnesConstant(N1))
@@ -3090,8 +3127,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the 'X' node here can either be nothing or an extract_vector_elt to catch
// more cases.
if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- N0.getOperand(0).getOpcode() == ISD::LOAD) ||
- N0.getOpcode() == ISD::LOAD) {
+ N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD &&
+ N0.getOperand(0).getResNo() == 0) ||
+ (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
N0 : N0.getOperand(0) );
@@ -3234,12 +3273,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
AddToWorklist(NewPtr.getNode());
- SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
- LN0->getChain(), NewPtr,
- LN0->getPointerInfo(),
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Alignment, LN0->getAAInfo());
+ SDValue Load = DAG.getExtLoad(
+ ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo(), ExtVT, Alignment,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
AddToWorklist(N);
CombineTo(LN0, Load, Load.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -3303,9 +3340,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
- SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
- N0.getOperand(1), false);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
return BSwap;
}
@@ -3576,7 +3612,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
EVT VT = N1.getValueType();
// fold (or x, undef) -> -1
if (!LegalOperations &&
- (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ (N0.isUndef() || N1.isUndef())) {
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
SDLoc(LocReference), VT);
@@ -3697,59 +3733,70 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
N1.getValueType().getScalarType().getSizeInBits()),
SDLoc(N), N1.getValueType());
- // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
- // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
if (isa<ShuffleVectorSDNode>(N0) &&
isa<ShuffleVectorSDNode>(N1) &&
// Avoid folding a node with illegal type.
- TLI.isTypeLegal(VT) &&
- N0->getOperand(1) == N1->getOperand(1) &&
- ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
- bool CanFold = true;
- unsigned NumElts = VT.getVectorNumElements();
- const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
- const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
- // We construct two shuffle masks:
- // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
- // and N1 as the second operand.
- // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
- // and N0 as the second operand.
- // We do this because OR is commutable and therefore there might be
- // two ways to fold this node into a shuffle.
- SmallVector<int,4> Mask1;
- SmallVector<int,4> Mask2;
-
- for (unsigned i = 0; i != NumElts && CanFold; ++i) {
- int M0 = SV0->getMaskElt(i);
- int M1 = SV1->getMaskElt(i);
-
- // Both shuffle indexes are undef. Propagate Undef.
- if (M0 < 0 && M1 < 0) {
- Mask1.push_back(M0);
- Mask2.push_back(M0);
- continue;
- }
+ TLI.isTypeLegal(VT)) {
+ bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
+ bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
+ bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+ bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
+ // Ensure both shuffles have a zero input.
+ if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
+ assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ bool CanFold = true;
+ int NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> Mask(NumElts);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
+
+ // If one element is zero and the otherside is undef, keep undef.
+ // This also handles the case that both are undef.
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
+ Mask[i] = -1;
+ continue;
+ }
- if (M0 < 0 || M1 < 0 ||
- (M0 < (int)NumElts && M1 < (int)NumElts) ||
- (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
- CanFold = false;
- break;
+ // Make sure only one of the elements is zero.
+ if (M0Zero == M1Zero) {
+ CanFold = false;
+ break;
+ }
+
+ assert((M0 >= 0 || M1 >= 0) && "Undef index!");
+
+ // We have a zero and non-zero element. If the non-zero came from
+ // SV0 make the index a LHS index. If it came from SV1, make it
+ // a RHS index. We need to mod by NumElts because we don't care
+ // which operand it came from in the original shuffles.
+ Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
}
- Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
- Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
- }
+ if (CanFold) {
+ SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
+ SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(NewLHS, NewRHS);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ }
- if (CanFold) {
- // Fold this sequence only if the resulting shuffle is 'legal'.
- if (TLI.isShuffleMaskLegal(Mask1, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
- N1->getOperand(0), &Mask1[0]);
- if (TLI.isShuffleMaskLegal(Mask2, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
- N0->getOperand(0), &Mask2[0]);
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
+ }
}
}
}
@@ -3760,8 +3807,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
// fold (or x, 0) -> x
if (isNullConstant(N1))
@@ -3817,9 +3864,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
-static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
- if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
@@ -3946,7 +3993,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
- unsigned NegOpcode, SDLoc DL) {
+ unsigned NegOpcode, const SDLoc &DL) {
// fold (or (shl x, (*ext y)),
// (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -3967,7 +4014,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
if (!TLI.isTypeLegal(VT)) return nullptr;
@@ -4093,12 +4140,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
// fold (xor x, undef) -> undef
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return N0;
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return N1;
// fold (xor c1, c2) -> c1^c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4106,8 +4153,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
- !isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold (xor x, 0) -> x
if (isNullConstant(N1))
@@ -4342,8 +4389,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 =
+ distributeTruncateThroughAnd(N->getOperand(1).getNode()))
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
N->getOperand(0), NewOp1);
}
@@ -4398,7 +4445,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N1C && N1C->isNullValue())
return N0;
// fold (shl undef, x) -> 0
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
@@ -4407,8 +4454,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
@@ -4541,7 +4587,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
APInt Val;
if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
(isa<ConstantSDNode>(N0.getOperand(1)) ||
- isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
@@ -4637,7 +4683,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
// Determine the residual right-shift amount.
- signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+ int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
// If the shift is not a no-op (in which case this should be just a sign
// extend already), the truncated to type is legal, sign_extend is legal
@@ -4664,8 +4710,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
- SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
- if (NewOp1.getNode())
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
@@ -4916,7 +4961,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (bswap c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
@@ -4924,12 +4969,21 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (bitreverse (bitreverse x)) -> x
+ if (N0.getOpcode() == ISD::BITREVERSE)
+ return N0.getOperand(0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (ctlz c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4939,7 +4993,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctlz_zero_undef c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4949,7 +5003,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4959,7 +5013,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (cttz_zero_undef c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
return SDValue();
}
@@ -4969,15 +5023,15 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ctpop c1) -> c2
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
return SDValue();
}
/// \brief Generate Min/Max node
-static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
- SDValue True, SDValue False,
+static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
ISD::CondCode CC, const TargetLowering &TLI,
SelectionDAG &DAG) {
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
@@ -5237,7 +5291,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
// length of the BV and see if all the non-undef nodes are the same.
ConstantSDNode *BottomHalf = nullptr;
for (int i = 0; i < NumElems / 2; ++i) {
- if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ if (Cond->getOperand(i)->isUndef())
continue;
if (BottomHalf == nullptr)
@@ -5249,7 +5303,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
// Do the same for the second half of the BuildVector
ConstantSDNode *TopHalf = nullptr;
for (int i = NumElems / 2; i < NumElems; ++i) {
- if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ if (Cond->getOperand(i)->isUndef())
continue;
if (TopHalf == nullptr)
@@ -5666,9 +5720,8 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2;
// Determine if the condition we're dealing with is constant
- SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
- N0, N1, CC, SDLoc(N), false);
- if (SCC.getNode()) {
+ if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
+ CC, SDLoc(N), false)) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
@@ -5676,7 +5729,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
- } else if (SCC->getOpcode() == ISD::UNDEF) {
+ } else if (SCC->isUndef()) {
// When the condition is UNDEF, just return the first operand. This is
// coherent the DAG creation, no setcc node is created in this case
return N2;
@@ -5729,7 +5782,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
EVT VT = N->getValueType(0);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+ Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
&& "Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
@@ -5756,7 +5810,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
for (unsigned i=0; i != NumElts; ++i) {
SDValue Op = N0->getOperand(i);
- if (Op->getOpcode() == ISD::UNDEF) {
+ if (Op->isUndef()) {
Elts.push_back(DAG.getUNDEF(SVT));
continue;
}
@@ -5771,7 +5825,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
+ return DAG.getBuildVector(VT, DL, Elts).getNode();
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -5839,8 +5893,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
- SDValue Trunc, SDValue ExtLoad, SDLoc DL,
- ISD::NodeType ExtType) {
+ SDValue Trunc, SDValue ExtLoad,
+ const SDLoc &DL, ISD::NodeType ExtType) {
// Extend SetCC uses if necessary.
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
SDNode *SetCC = SetCCs[i];
@@ -5929,9 +5983,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue SplitLoad = DAG.getExtLoad(
ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
- LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
- LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
- Align, LN0->getAAInfo());
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
DAG.getConstant(Stride, DL, BasePtr.getValueType()));
@@ -6145,16 +6198,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
- // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
- unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
+ // Here, T can be 1 or -1, depending on the type of the setcc and
+ // getBooleanContents().
+ unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
+
SDLoc DL(N);
- SDValue NegOne =
- DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
- SDValue SCC =
- SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
- NegOne, DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
- if (SCC.getNode()) return SCC;
+ // To determine the "true" side of the select, we need to know the high bit
+ // of the value returned by the setcc if it evaluates to true.
+ // If the type of the setcc is i1, then the true case of the select is just
+ // sext(i1 1), that is, -1.
+ // If the type of the setcc is larger (say, i8) then the value of the high
+ // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
+ // of the appropriate width.
+ SDValue ExtTrueVal =
+ (SetCCWidth == 1)
+ ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
+ DL, VT)
+ : TLI.getConstTrueVal(DAG, VT, DL);
+
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
if (!VT.isVector()) {
EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
@@ -6162,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
- N0.getOperand(0), N0.getOperand(1), CC);
- return DAG.getSelect(DL, VT, SetCC,
- NegOne, DAG.getConstant(0, DL, VT));
+ SDValue SetCC =
+ DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
+ DAG.getConstant(0, DL, VT));
}
}
}
@@ -6436,56 +6503,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
+ // Only do this before legalize for now.
if (!LegalOperations && VT.isVector() &&
N0.getValueType().getVectorElementType() == MVT::i1) {
- EVT N0VT = N0.getOperand(0).getValueType();
- if (getSetCCResultType(N0VT) == N0.getValueType())
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
return SDValue();
- // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
- // Only do this before legalize for now.
- EVT EltVT = VT.getVectorElementType();
+ // We know that the # elements of the results is the same as the #
+ // elements of the compare (and the # elements of the compare result for
+ // that matter). Check to see that they are the same size. If so, we know
+ // that the element size of the sext'd result matches the element size of
+ // the compare operands.
SDLoc DL(N);
- SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
- DAG.getConstant(1, DL, EltVT));
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- return DAG.getNode(ISD::AND, DL, VT,
- DAG.getSetCC(DL, VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get()),
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
- OneOps));
+ SDValue VecOnes = DAG.getConstant(1, DL, VT);
+ if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
+ }
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
+ // truncate/sign extend.
+ EVT MatchingElementType = EVT::getIntegerVT(
+ *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType = EVT::getVectorVT(
+ *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
SDValue VsetCC =
- DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getNode(ISD::AND, DL, VT,
- DAG.getSExtOrTrunc(VsetCC, DL, VT),
- DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
+ DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
+ VecOnes);
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDLoc DL(N);
- SDValue SCC =
- SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
- if (SCC.getNode()) return SCC;
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
@@ -6660,11 +6719,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDLoc DL(N);
- SDValue SCC =
- SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
- DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
- if (SCC.getNode())
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
return SCC;
}
@@ -6854,15 +6912,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign, LN0->getAAInfo());
+ Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
- Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), NewAlign, LN0->getAAInfo());
+ Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
+ NewAlign, LN0->getMemOperand()->getFlags(),
+ LN0->getAAInfo());
// Replace the old load's chain with the new load's chain.
WorklistRemover DeadNodes(*this);
@@ -6902,7 +6959,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (sext_in_reg c1) -> c1
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
@@ -6988,9 +7045,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
- SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
- N0.getOperand(1), false);
- if (BSwap.getNode())
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
BSwap, N1);
}
@@ -7002,7 +7058,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (N0.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
return DAG.getUNDEF(VT);
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
@@ -7021,7 +7091,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getValueType() == N->getValueType(0))
return N0;
// fold (truncate c1) -> c1
- if (isConstantIntBuildVectorOrConstantInt(N0))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -7030,12 +7100,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) {
+ // if the source is smaller than the dest, we still need an extend.
if (N0.getOperand(0).getValueType().bitsLT(VT))
- // if the source is smaller than the dest, we still need an extend
- return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // if the source is larger than the dest, than we just need the truncate.
if (N0.getOperand(0).getValueType().bitsGT(VT))
- // if the source is larger than the dest, than we just need the truncate
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// if the source and dest are the same type, we can drop both the extend
// and the truncate.
@@ -7071,12 +7140,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
- SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
- NVT, N0.getOperand(0));
-
SDLoc DL(N);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, TrTy, V,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
+ DAG.getBitcast(NVT, N0.getOperand(0)),
DAG.getConstant(Index, DL, IndexTy));
}
}
@@ -7094,6 +7160,25 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+ if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+ TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
+ if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t Amt = CAmt->getZExtValue();
+ unsigned Size = VT.getSizeInBits();
+
+ if (Amt < Size / 2) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc,
+ DAG.getConstant(Amt, SL, AmtVT));
+ }
+ }
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -7121,7 +7206,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
}
@@ -7131,10 +7216,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
- SDValue Shorter =
- GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
- VT.getSizeInBits()));
- if (Shorter.getNode())
+ if (SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits())))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
// fold (truncate (load x)) -> (smaller load x)
@@ -7168,7 +7252,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
SDValue X = N0.getOperand(i);
- if (X.getOpcode() != ISD::UNDEF) {
+ if (!X.isUndef()) {
V = X;
Idx = i;
NumDefs++;
@@ -7200,6 +7284,24 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Fold truncate of a bitcast of a vector to an extract of the low vector
+ // element.
+ //
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
+ if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
+ SDValue VecSrc = N0.getOperand(0);
+ EVT SrcVT = VecSrc.getValueType();
+ if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ SDLoc SL(N);
+
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
+ VecSrc, DAG.getConstant(0, SL, IdxVT));
+ }
+ }
+
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
@@ -7226,23 +7328,17 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
-
- if (ISD::isNON_EXTLoad(LD2) &&
- LD2->hasOneUse() &&
- // If both are volatile this would reduce the number of volatile loads.
- // If one is volatile it might be ok, but play conservative and bail out.
- !LD1->isVolatile() &&
- !LD2->isVolatile() &&
- DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+ unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
+ if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
unsigned Align = LD1->getAlignment();
unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
VT.getTypeForEVT(*DAG.getContext()));
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
- LD1->getBasePtr(), LD1->getPointerInfo(),
- false, false, false, Align);
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), Align);
}
return SDValue();
@@ -7254,6 +7350,49 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
+static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // If this is not a bitcast to an FP type or if the target doesn't have
+ // IEEE754-compliant FP logic, we're done.
+ EVT VT = N->getValueType(0);
+ if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ return SDValue();
+
+ // TODO: Use splat values for the constant-checking below and remove this
+ // restriction.
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+ if (SourceVT.isVector())
+ return SDValue();
+
+ unsigned FPOpcode;
+ APInt SignMask;
+ switch (N0.getOpcode()) {
+ case ISD::AND:
+ FPOpcode = ISD::FABS;
+ SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+ break;
+ case ISD::XOR:
+ FPOpcode = ISD::FNEG;
+ SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+ break;
+ // TODO: ISD::OR --> ISD::FNABS?
+ default:
+ return SDValue();
+ }
+
+ // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
+ // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ SDValue LogicOp0 = N0.getOperand(0);
+ ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
+ LogicOp0.getOpcode() == ISD::BITCAST &&
+ LogicOp0->getOperand(0).getValueType() == VT)
+ return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -7284,13 +7423,12 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
(isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
TLI.isOperationLegal(ISD::Constant, VT)))
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
+ return DAG.getBitcast(VT, N0);
}
// (conv (conv x, t1), t2) -> (conv x, t2)
if (N0.getOpcode() == ISD::BITCAST)
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getBitcast(VT, N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
@@ -7303,21 +7441,24 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned Align = DAG.getDataLayout().getABITypeAlignment(
- VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
- if (Align <= OrigAlign) {
- SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
- LN0->getBasePtr(), LN0->getPointerInfo(),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), OrigAlign,
- LN0->getAAInfo());
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ LN0->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), OrigAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
}
+ if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
+ return V;
+
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
//
@@ -7334,8 +7475,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
N0.getNode()->hasOneUse() && VT.isInteger() &&
!VT.isVector() && !N0.getValueType().isVector()) {
- SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
- N0.getOperand(0));
+ SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
@@ -7388,8 +7528,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
- SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
- IntXVT, N0.getOperand(1));
+ SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
AddToWorklist(X.getNode());
// If X has a different width than the result/lhs, sext it or truncate it.
@@ -7412,11 +7551,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
- SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
- N0.getOperand(0));
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(Cst.getNode());
- SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
- N0.getOperand(1));
+ SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
AddToWorklist(X.getNode());
SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
AddToWorklist(XorResult.getNode());
@@ -7439,8 +7576,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
- SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
- VT, N0.getOperand(0));
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
AddToWorklist(Cst.getNode());
@@ -7472,7 +7608,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return SDValue(Op.getOperand(0));
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+ return DAG.getBitcast(VT, Op);
return SDValue();
};
@@ -7529,8 +7665,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
- DAG.getNode(ISD::BITCAST, SDLoc(BV),
- DstEltVT, BV->getOperand(0)));
+ DAG.getBitcast(DstEltVT, BV->getOperand(0)));
SmallVector<SDValue, 8> Ops;
for (SDValue Op : BV->op_values()) {
@@ -7538,11 +7673,10 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
- Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
- DstEltVT, Op));
+ Ops.push_back(DAG.getBitcast(DstEltVT, Op));
AddToWorklist(Ops.back().getNode());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+ return DAG.getBuildVector(VT, SDLoc(BV), Ops);
}
// Otherwise, we're growing or shrinking the elements. To avoid having to
@@ -7584,7 +7718,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Shift the previously computed bits over.
NewBits <<= SrcBitSize;
SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.getOpcode() == ISD::UNDEF) continue;
+ if (Op.isUndef()) continue;
EltIsUndef = false;
NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
@@ -7598,7 +7732,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ return DAG.getBuildVector(VT, DL, Ops);
}
// Finally, this must be the case where we are shrinking elements: each input
@@ -7609,7 +7743,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
SmallVector<SDValue, 8> Ops;
for (const SDValue &Op : BV->op_values()) {
- if (Op.getOpcode() == ISD::UNDEF) {
+ if (Op.isUndef()) {
Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
@@ -7628,7 +7762,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ return DAG.getBuildVector(VT, DL, Ops);
}
/// Try to perform FMA combining on a given FADD node.
@@ -7654,6 +7788,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ ;
+ if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
@@ -7837,6 +7976,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
@@ -8305,7 +8448,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
AddToWorklist(Fused.getNode());
return Fused;
}
-
return SDValue();
}
@@ -8662,7 +8804,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
// Compute the reciprocal 1.0 / c2.
- APFloat N1APF = N1CFP->getValueAPF();
+ const APFloat &N1APF = N1CFP->getValueAPF();
APFloat Recip(N1APF.getSemantics(), 1); // 1.0
APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
// Only do the transform if the reciprocal is a legal fp immediate that
@@ -8681,12 +8823,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
}
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
@@ -8694,7 +8836,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
@@ -8715,7 +8857,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SqrtOp.getNode()) {
// We found a FSQRT, so try to make this fold:
// x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
- if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
@@ -8772,27 +8914,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// For now, create a Flags object for use with all unsafe math transforms.
SDNodeFlags Flags;
Flags.setUnsafeAlgebra(true);
-
- // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
- SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
- if (!RV)
- return SDValue();
-
- EVT VT = RV.getValueType();
- SDLoc DL(N);
- RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
- AddToWorklist(RV.getNode());
-
- // Unfortunately, RV is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
- AddToWorklist(RV.getNode());
-
- return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
- ZeroCmp, Zero, RV);
+ return buildSqrtEstimate(N->getOperand(0), &Flags);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -8868,7 +8990,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -8922,7 +9044,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- if (isConstantIntBuildVectorOrConstantInt(N0) &&
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -8993,9 +9115,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
}
if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
- if (SrcVT == VT)
- return Src;
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+ return DAG.getBitcast(VT, Src);
}
return SDValue();
}
@@ -9040,6 +9160,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+
+ // Skip this folding if it results in an fp_round from f80 to f16.
+ //
+ // f80 to f16 always generates an expensive (and as yet, unimplemented)
+ // libcall to __truncxfhf2 instead of selecting native f16 conversion
+ // instructions from f32 or f64. Moreover, the first (value-preserving)
+ // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+ // x86.
+ if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+ return SDValue();
+
// If the first fp_round isn't a value preserving truncation, it might
// introduce a tie in the second fp_round, that wouldn't occur in the
// single-step fp_round we want to fold to.
@@ -9198,7 +9329,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
DAG.getConstant(SignMask, DL0, IntVT));
AddToWorklist(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
+ return DAG.getBitcast(VT, Int);
}
}
@@ -9303,7 +9434,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
DAG.getConstant(SignMask, DL, IntVT));
AddToWorklist(Int.getNode());
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
+ return DAG.getBitcast(N->getValueType(0), Int);
}
}
@@ -9607,6 +9738,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
}
+ // Caches for hasPredecessorHelper.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+
// If the offset is a constant, there may be other adds of constants that
// can be folded with this one. We should do this to avoid having to keep
// a copy of the original base pointer.
@@ -9621,7 +9757,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
continue;
- if (Use.getUser()->isPredecessorOf(N))
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
continue;
if (Use.getUser()->getOpcode() != ISD::ADD &&
@@ -9651,14 +9787,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Now check for #3 and #4.
bool RealUse = false;
- // Caches for hasPredecessorHelper
- SmallPtrSet<const SDNode *, 32> Visited;
- SmallVector<const SDNode *, 16> Worklist;
-
for (SDNode *Use : Ptr.getNode()->uses()) {
if (Use == N)
continue;
- if (N->hasPredecessorHelper(Use, Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
return false;
// If Ptr may be folded in addressing mode of other use, then it's
@@ -9720,7 +9852,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
ConstantSDNode *CN =
cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
int X0, X1, Y0, Y1;
- APInt Offset0 = CN->getAPIntValue();
+ const APInt &Offset0 = CN->getAPIntValue();
APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
@@ -9984,13 +10116,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getMemOperand()->getBaseAlignment()) {
- SDValue NewLoad =
- DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
- LD->getValueType(0),
- Chain, Ptr, LD->getPointerInfo(),
- LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), Align, LD->getAAInfo());
+ SDValue NewLoad = DAG.getExtLoad(
+ LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), Align,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
if (NewLoad.getNode() != N)
return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
@@ -10208,7 +10337,7 @@ struct LoadedSlice {
return false;
// Offsets are for indexed load only, we do not handle that.
- if (Origin->getOffset().getOpcode() != ISD::UNDEF)
+ if (!Origin->getOffset().isUndef())
return false;
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
@@ -10291,10 +10420,10 @@ struct LoadedSlice {
EVT SliceType = getLoadedType();
// Create the load for the slice.
- SDValue LastInst = DAG->getLoad(
- SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
- Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
- Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
+ SDValue LastInst =
+ DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset),
+ getAlignment(), Origin->getMemOperand()->getFlags());
// If the final type is not the same as the loaded type, this means that
// we have to pad with zero. Create a zero extend for that.
EVT FinalType = Inst->getValueType(0);
@@ -10718,9 +10847,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
++OpsNarrowed;
- return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset),
- false, false, NewAlign).getNode();
+ return DAG
+ .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign)
+ .getNode();
}
@@ -10826,19 +10956,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Ptr.getValueType(), Ptr,
DAG.getConstant(PtrOff, SDLoc(LD),
Ptr.getValueType()));
- SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
- LD->getChain(), NewPtr,
- LD->getPointerInfo().getWithOffset(PtrOff),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(), NewAlign,
- LD->getAAInfo());
+ SDValue NewLD =
+ DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
DAG.getConstant(NewImm, SDLoc(Value),
NewVT));
- SDValue NewST = DAG.getStore(Chain, SDLoc(N),
- NewVal, NewPtr,
- ST->getPointerInfo().getWithOffset(PtrOff),
- false, false, NewAlign);
+ SDValue NewST =
+ DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
AddToWorklist(NewPtr.getNode());
AddToWorklist(NewLD.getNode());
@@ -10887,15 +11014,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (LDAlign < ABIAlign || STAlign < ABIAlign)
return SDValue();
- SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
- LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(),
- false, false, false, LDAlign);
+ SDValue NewLD =
+ DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LDAlign);
- SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
- NewLD, ST->getBasePtr(),
- ST->getPointerInfo(),
- false, false, STAlign);
+ SDValue NewST =
+ DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(), STAlign);
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -10940,9 +11065,23 @@ struct BaseIndexOffset {
}
/// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr) {
+ static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
bool IsIndexSignExt = false;
+ // Split up a folded GlobalAddress+Offset into its component parts.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+ if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+ return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+ SDLoc(GA),
+ GA->getValueType(0),
+ /*Offset=*/0,
+ /*isTargetGA=*/false,
+ GA->getTargetFlags()),
+ SDValue(),
+ GA->getOffset(),
+ IsIndexSignExt);
+ }
+
// We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
// instruction, then it could be just the BASE or everything else we don't
// know how to handle. Just use Ptr as BASE and give up.
@@ -11063,7 +11202,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
// multiply (CONST * A) after we also do the same transformation
// to the "t2" instruction.
if (OtherOp->getOpcode() == ISD::ADD &&
- isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
OtherOp->getOperand(0).getNode() == MulVar)
return true;
}
@@ -11073,11 +11212,9 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
- SDLoc SL,
- ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains,
- EVT Ty) const {
+SDValue DAGCombiner::getMergedConstantVectorStore(
+ SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
SmallVector<SDValue, 8> BuildVector;
for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
@@ -11086,7 +11223,7 @@ SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
BuildVector.push_back(St->getValue());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
+ return DAG.getBuildVector(Ty, SL, BuildVector);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -11182,29 +11319,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
- false, false,
FirstInChain->getAlignment());
- // Replace the last store with the new store
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumStores; ++i) {
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ if (UseAA) {
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ } else {
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (StoreNodes[i].MemNode == LatestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
}
return true;
@@ -11215,14 +11359,14 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
return;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ if (BasePtr.Base.isUndef())
return;
// Walk up the chain and look for nodes with offsets from the same
@@ -11253,7 +11397,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (OtherST->getMemoryVT() != MemVT)
continue;
- BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
if (Ptr.equalBaseIndex(BasePtr))
StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
@@ -11269,7 +11413,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
if (!Ptr.equalBaseIndex(BasePtr))
@@ -11280,9 +11424,8 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
break;
// No truncation.
- if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
- if (St->isTruncatingStore())
- break;
+ if (Index->isTruncatingStore())
+ break;
// The stored memory type must be the same.
if (Index->getMemoryVT() != MemVT)
@@ -11326,6 +11469,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
}
}
+// We need to check that merging these stores does not cause a loop
+// in the DAG. Any store candidate may depend on another candidate
+// indirectly through its operand (we already consider dependencies
+// through the chain). Check in parallel by searching up from
+// non-chain operands of candidates.
+bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // search ops of store candidates
+ for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ SDNode *n = StoreNodes[i].MemNode;
+ // Potential loops may happen only through non-chain operands
+ for (unsigned j = 1; j < n->getNumOperands(); ++j)
+ Worklist.push_back(n->getOperand(j).getNode());
+ }
+ // search through DAG. We can stop early if we find a storenode
+ for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
+ return false;
+ }
+ return true;
+}
+
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -11379,6 +11546,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (StoreNodes.size() < 2)
return false;
+ // only do dep endence check in AA case
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
+ return false;
+
// Sort the memory operands according to their distance from the
// base pointer. As a secondary criteria: make sure stores coming
// later in the code come first in the list. This is important for
@@ -11557,7 +11730,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
if (Ld->getMemoryVT() != MemVT)
break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
// If this is not the first ptr that we check.
if (LdBasePtr.Base.getNode()) {
// The base ptr must be the same.
@@ -11690,16 +11863,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// The merged loads are required to have the same incoming chain, so
// using the first's chain is acceptable.
- SDValue NewLoad = DAG.getLoad(
- JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign);
SDValue NewStoreChain =
DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
- SDValue NewStore = DAG.getStore(
- NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
+ SDValue NewStore =
+ DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
// Transfer chain users from old loads to the new load.
for (unsigned i = 0; i < NumElem; ++i) {
@@ -11708,16 +11881,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
SDValue(NewLoad.getNode(), 1));
}
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem ; ++i) {
- // Remove all Store nodes.
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
+ if (UseAA) {
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ } else {
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ // Remove all Store nodes.
+ if (StoreNodes[i].MemNode == LatestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
}
return true;
@@ -11808,21 +11987,17 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
std::swap(Lo, Hi);
unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
- SDValue St0 = DAG.getStore(Chain, DL, Lo,
- Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal,
- ST->getAlignment(), AAInfo);
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(4, DL, Ptr.getValueType()));
Alignment = MinAlign(Alignment, 4U);
- SDValue St1 = DAG.getStore(Chain, DL, Hi,
- Ptr, ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal,
- Alignment, AAInfo);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ Alignment, MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
@@ -11841,21 +12016,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// resultant store does not need a higher alignment than the original.
if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
- unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
- unsigned Align = DAG.getDataLayout().getABITypeAlignment(
- SVT.getTypeForEVT(*DAG.getContext()));
- if (Align <= OrigAlign &&
- ((!LegalOperations && !ST->isVolatile()) ||
- TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
- return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
- Ptr, ST->getPointerInfo(), ST->isVolatile(),
- ST->isNonTemporal(), OrigAlign,
- ST->getAAInfo());
+ if (((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
+ unsigned OrigAlign = ST->getAlignment();
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
+ ST->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), OrigAlign,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ }
+ }
}
// Turn 'store undef, Ptr' -> nothing.
- if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
+ if (Value.isUndef() && ST->isUnindexed())
return Chain;
// Try to infer better alignment information than the store already has.
@@ -11863,10 +12041,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment()) {
SDValue NewStore =
- DAG.getTruncStore(Chain, SDLoc(N), Value,
- Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(), Align,
- ST->getAAInfo());
+ DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
+ ST->getMemoryVT(), Align,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
if (NewStore.getNode() != N)
return CombineTo(ST, NewStore, true);
}
@@ -11898,6 +12075,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// manipulation. Return the original node to not do anything else.
return SDValue(ST, 0);
}
+ Chain = ST->getChain();
}
// Try transforming N to an indexed store.
@@ -12001,7 +12179,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
// If the inserted element is an UNDEF, just use the input vector.
- if (InVal.getOpcode() == ISD::UNDEF)
+ if (InVal.isUndef())
return InVec;
EVT VT = InVec.getValueType();
@@ -12045,7 +12223,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
Ops.append(InVec.getNode()->op_begin(),
InVec.getNode()->op_end());
- } else if (InVec.getOpcode() == ISD::UNDEF) {
+ } else if (InVec.isUndef()) {
unsigned NElts = VT.getVectorNumElements();
Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
} else {
@@ -12065,11 +12243,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
// Return the new vector
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ assert(!OriginalLoad->isVolatile());
+
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
unsigned Align = OriginalLoad->getAlignment();
@@ -12115,21 +12295,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
VecEltVT)
? ISD::ZEXTLOAD
: ISD::EXTLOAD;
- Load = DAG.getExtLoad(
- ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
- VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
- OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
+ OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
+ Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
} else {
- Load = DAG.getLoad(
- VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
- OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
- OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
+ Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
+ MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
Chain = Load.getValue(1);
if (ResultVT.bitsLT(VecEltVT))
Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+ Load = DAG.getBitcast(ResultVT, Load);
}
WorklistRemover DeadNodes(*this);
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
@@ -12183,6 +12362,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
+ // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+ if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
+ ConstEltNo->isNullValue() && VT.isInteger()) {
+ SDValue BCSrc = InVec.getOperand(0);
+ if (BCSrc.getValueType().isScalarInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+ }
+
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ EltNo == InVec.getOperand(2)) {
+ SDValue Elt = InVec.getOperand(1);
+ return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
+ }
+
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
@@ -12256,9 +12453,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
ISD::isNormalLoad(InVec.getNode()) &&
!N->getOperand(1)->hasPredecessor(InVec.getNode())) {
SDValue Index = N->getOperand(1);
- if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
- return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
- OrigLoad);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
+ if (!OrigLoad->isVolatile()) {
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+ }
}
// Perform only after legalization to ensure build_vector / vector_shuffle
@@ -12358,7 +12558,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
- if (In.getOpcode() == ISD::UNDEF) continue;
+ if (In.isUndef()) continue;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -12413,9 +12613,9 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue Cast = N->getOperand(i);
assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
Cast.getOpcode() == ISD::ZERO_EXTEND ||
- Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+ Cast.isUndef()) && "Invalid cast opcode");
SDValue In;
- if (Cast.getOpcode() == ISD::UNDEF)
+ if (Cast.isUndef())
In = DAG.getUNDEF(SourceType);
else
In = Cast->getOperand(0);
@@ -12434,12 +12634,12 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!isTypeLegal(VecVT)) return SDValue();
// Make the new BUILD_VECTOR.
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
+ SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorklist(BV.getNode());
// Bitcast to the desired type.
- return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+ return DAG.getBitcast(VT, BV);
}
SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
@@ -12502,12 +12702,12 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
- if (In.getOpcode() == ISD::UNDEF)
+ if (In.isUndef())
Opnds.push_back(DAG.getUNDEF(SrcVT));
else
Opnds.push_back(In.getOperand(0));
}
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
+ SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
AddToWorklist(BV.getNode());
return DAG.getNode(Opcode, dl, VT, BV);
@@ -12545,7 +12745,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue Op = N->getOperand(i);
// Ignore undef inputs.
- if (Op.getOpcode() == ISD::UNDEF) continue;
+ if (Op.isUndef()) continue;
// See if we can combine this build_vector into a blend with a zero vector.
if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
@@ -12681,7 +12881,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
SDValue Ops[2];
Ops[0] = VecIn1;
Ops[1] = VecIn2;
- return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
}
return SDValue();
@@ -12735,18 +12935,17 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
for (SDValue &Op : Ops) {
if (Op.getValueType() == SVT)
continue;
- if (Op.getOpcode() == ISD::UNDEF)
+ if (Op.isUndef())
Op = ScalarUndef;
else
- Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+ Op = DAG.getBitcast(SVT, Op);
}
}
}
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
VT.getSizeInBits() / SVT.getSizeInBits());
- return DAG.getNode(ISD::BITCAST, DL, VT,
- DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+ return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
@@ -12768,7 +12967,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
Op = Op.getOperand(0);
// UNDEF nodes convert to UNDEF shuffle mask values.
- if (Op.getOpcode() == ISD::UNDEF) {
+ if (Op.isUndef()) {
Mask.append((unsigned)NumOpElts, -1);
continue;
}
@@ -12788,7 +12987,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
ExtVec = ExtVec.getOperand(0);
// UNDEF nodes convert to UNDEF shuffle mask values.
- if (ExtVec.getOpcode() == ISD::UNDEF) {
+ if (ExtVec.isUndef()) {
Mask.append((unsigned)NumOpElts, -1);
continue;
}
@@ -12812,11 +13011,11 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// At most we can reference 2 inputs in the final shuffle.
- if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
+ if (SV0.isUndef() || SV0 == ExtVec) {
SV0 = ExtVec;
for (int i = 0; i != NumOpElts; ++i)
Mask.push_back(i + ExtIdx);
- } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
+ } else if (SV1.isUndef() || SV1 == ExtVec) {
SV1 = ExtVec;
for (int i = 0; i != NumOpElts; ++i)
Mask.push_back(i + ExtIdx + NumElts);
@@ -12844,7 +13043,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// Optimize concat_vectors where all but the first of the vectors are undef.
if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
- return Op.getOpcode() == ISD::UNDEF;
+ return Op.isUndef();
})) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
@@ -12874,7 +13073,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDLoc dl = SDLoc(N);
SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
- return DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ return DAG.getBitcast(VT, Res);
}
}
@@ -12885,9 +13084,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
auto IsBuildVectorOrUndef = [](const SDValue &Op) {
return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
};
- bool AllBuildVectorsOrUndefs =
- std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
- if (AllBuildVectorsOrUndefs) {
+ if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
SmallVector<SDValue, 8> Opnds;
EVT SVT = VT.getScalarType();
@@ -12926,7 +13123,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
assert(VT.getVectorNumElements() == Opnds.size() &&
"Concat vector type mismatch");
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
@@ -12948,7 +13145,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue Op = N->getOperand(i);
- if (Op.getOpcode() == ISD::UNDEF)
+ if (Op.isUndef())
continue;
// Check if this is the identity extract:
@@ -13033,11 +13230,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
- return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
- DAG.getNode(ISD::BITCAST, dl,
- N->getOperand(0).getValueType(),
- V->getOperand(0)), N->getOperand(1));
+ return DAG.getBitcast(NVT, V->getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
+ N->getOperand(1));
}
}
@@ -13148,7 +13345,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
// Special case: shuffle(concat(A,B)) can be more efficiently represented
// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
// half vector elements.
- if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
+ if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
SVN->getMask().end(), [](int i) { return i == -1; })) {
N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
@@ -13204,7 +13401,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
// Canonicalize shuffle undef, undef -> undef
- if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+ if (N0.isUndef() && N1.isUndef())
return DAG.getUNDEF(VT);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
@@ -13217,29 +13414,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (Idx >= (int)NumElts) Idx -= NumElts;
NewMask.push_back(Idx);
}
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
- &NewMask[0]);
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
- if (N0.getOpcode() == ISD::UNDEF) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= 0) {
- if (Idx >= (int)NumElts)
- Idx -= NumElts;
- else
- Idx = -1; // remove reference to lhs
- }
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
- &NewMask[0]);
- }
+ if (N0.isUndef())
+ return DAG.getCommutedVectorShuffle(*SVN);
// Remove references to rhs if it is undef
- if (N1.getOpcode() == ISD::UNDEF) {
+ if (N1.isUndef()) {
bool Changed = false;
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
@@ -13251,7 +13434,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(Idx);
}
if (Changed)
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
// If it is a splat, check if the argument vector is another splat or a
@@ -13275,7 +13458,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue Base;
bool AllSame = true;
for (unsigned i = 0; i != NumElts; ++i) {
- if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ if (!V->getOperand(i).isUndef()) {
Base = V->getOperand(i);
break;
}
@@ -13296,13 +13479,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Canonicalize any other splat as a build_vector.
const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
- SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- V->getValueType(0), Ops);
+ SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
if (V->getValueType(0) != VT)
- NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ NewBV = DAG.getBitcast(VT, NewBV);
return NewBV;
}
}
@@ -13315,12 +13497,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
- (N1.getOpcode() == ISD::UNDEF ||
+ (N1.isUndef() ||
(N1.getOpcode() == ISD::CONCAT_VECTORS &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
- SDValue V = partitionShuffleOfConcats(N, DAG);
-
- if (V.getNode())
+ if (SDValue V = partitionShuffleOfConcats(N, DAG))
return V;
}
@@ -13357,7 +13537,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Op = TLI.isZExtFree(Op.getValueType(), SVT)
? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
: DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+ return DAG.getBuildVector(VT, SDLoc(N), Ops);
}
}
@@ -13365,7 +13545,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
// back to their original types.
if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
- N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+ N1.isUndef() && Level < AfterLegalizeVectorOps &&
TLI.isTypeLegal(VT)) {
// Peek through the bitcast only if there is one user.
@@ -13426,11 +13606,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
if (LegalMask) {
- SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
- SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
- return DAG.getNode(
- ISD::BITCAST, SDLoc(N), VT,
- DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ SV0 = DAG.getBitcast(ScaleVT, SV0);
+ SV1 = DAG.getBitcast(ScaleVT, SV1);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
}
}
}
@@ -13451,7 +13630,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue SV0 = N1->getOperand(0);
SDValue SV1 = N1->getOperand(1);
bool HasSameOp0 = N0 == SV0;
- bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+ bool IsSV1Undef = SV1.isUndef();
if (HasSameOp0 || IsSV1Undef || N0 == SV1)
// Commute the operands of this shuffle so that next rule
// will trigger.
@@ -13504,7 +13683,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// Simple case where 'CurrentVec' is UNDEF.
- if (CurrentVec.getOpcode() == ISD::UNDEF) {
+ if (CurrentVec.isUndef()) {
Mask.push_back(-1);
continue;
}
@@ -13559,7 +13738,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
}
return SDValue();
@@ -13595,26 +13774,30 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ if (N0.getValueType() != N1.getValueType())
+ return SDValue();
+
// If the input vector is a concatenation, and the insert replaces
// one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
- N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
+ N2.getOpcode() == ISD::Constant) {
APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
EVT VT = N->getValueType(0);
// Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
// (concat_vectors Z, Y)
if (InsIdx == 0)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
- N->getOperand(1), N0.getOperand(1));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
+ N0.getOperand(1));
// Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
// (concat_vectors X, Z)
- if (InsIdx == VT.getVectorNumElements()/2)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
- N0.getOperand(0), N->getOperand(1));
+ if (InsIdx == VT.getVectorNumElements() / 2)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
+ N1);
}
return SDValue();
@@ -13684,7 +13867,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
- if (Elt.getOpcode() == ISD::UNDEF) {
+ if (Elt.isUndef()) {
Indices.push_back(-1);
continue;
}
@@ -13724,7 +13907,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue Zero = DAG.getConstant(0, dl, ClearVT);
return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
DAG.getBitcast(ClearVT, LHS),
- Zero, &Indices[0]));
+ Zero, Indices));
};
// Determine maximum split level (byte level masking).
@@ -13763,8 +13946,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
- LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
- RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+ LHS.getOperand(1).isUndef() &&
+ RHS.getOperand(1).isUndef()) {
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
@@ -13776,15 +13959,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
N->getFlags());
AddUsersToWorklist(N);
return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
- &SVN0->getMask()[0]);
+ SVN0->getMask());
}
}
return SDValue();
}
-SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
- SDValue N1, SDValue N2){
+SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2) {
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
@@ -13819,33 +14002,33 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
- // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
- // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+ // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
// We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
SDValue Sqrt = RHS;
ISD::CondCode CC;
SDValue CmpLHS;
- const ConstantFPSDNode *NegZero = nullptr;
+ const ConstantFPSDNode *Zero = nullptr;
if (TheSelect->getOpcode() == ISD::SELECT_CC) {
CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
CmpLHS = TheSelect->getOperand(0);
- NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
} else {
// SELECT or VSELECT
SDValue Cmp = TheSelect->getOperand(0);
if (Cmp.getOpcode() == ISD::SETCC) {
CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
CmpLHS = Cmp.getOperand(0);
- NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
}
}
- if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+ if (Zero && Zero->isZero() &&
Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
CC == ISD::SETULT || CC == ISD::SETLT)) {
- // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+ // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
CombineTo(TheSelect, Sqrt);
return true;
}
@@ -13932,24 +14115,22 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// It is safe to replace the two loads if they have different alignments,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
- bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
+ if (!RLD->isInvariant())
+ MMOFlags &= ~MachineMemOperand::MOInvariant;
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
- Load = DAG.getLoad(TheSelect->getValueType(0),
- SDLoc(TheSelect),
- // FIXME: Discards pointer and AA info.
- LLD->getChain(), Addr, MachinePointerInfo(),
- LLD->isVolatile(), LLD->isNonTemporal(),
- isInvariant, Alignment);
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
+ LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
+ MMOFlags);
} else {
- Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
- RLD->getExtensionType() : LLD->getExtensionType(),
- SDLoc(TheSelect),
- TheSelect->getValueType(0),
- // FIXME: Discards pointer and AA info.
- LLD->getChain(), Addr, MachinePointerInfo(),
- LLD->getMemoryVT(), LLD->isVolatile(),
- LLD->isNonTemporal(), isInvariant, Alignment);
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getExtLoad(
+ LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
+ : LLD->getExtensionType(),
+ SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
+ MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
}
// Users of the select now use the result of the load.
@@ -13967,9 +14148,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
-SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
- SDValue N2, SDValue N3,
- ISD::CondCode CC, bool NotExtCompare) {
+SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
@@ -14057,7 +14238,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
return DAG.getLoad(
TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, Alignment);
+ Alignment);
}
}
@@ -14116,7 +14297,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
- APInt AndMask = ConstAndRHS->getAPIntValue();
+ const APInt &AndMask = ConstAndRHS->getAPIntValue();
SDValue ShlAmt =
DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
getShiftAmountTy(AndLHS.getValueType()));
@@ -14210,13 +14391,48 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
}
}
+ // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
+ // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
+ // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue ValueOnZero = N2;
+ SDValue Count = N3;
+ // If the condition is NE instead of E, swap the operands.
+ if (CC == ISD::SETNE)
+ std::swap(ValueOnZero, Count);
+ // Check if the value on zero is a constant equal to the bits in the type.
+ if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
+ if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
+ // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
+ // legal, combine to just cttz.
+ if ((Count.getOpcode() == ISD::CTTZ ||
+ Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
+ return DAG.getNode(ISD::CTTZ, DL, VT, N0);
+ // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
+ // legal, combine to just ctlz.
+ if ((Count.getOpcode() == ISD::CTLZ ||
+ Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
+ return DAG.getNode(ISD::CTLZ, DL, VT, N0);
+ }
+ }
+ }
+
return SDValue();
}
/// This is a stub for TargetLowering::SimplifySetCC.
-SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
- SDValue N1, ISD::CondCode Cond,
- SDLoc DL, bool foldBooleans) {
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ bool foldBooleans) {
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
@@ -14227,6 +14443,11 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
/// by a magic number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
return SDValue();
@@ -14268,6 +14489,11 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
/// number.
/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
if (!C)
return SDValue();
@@ -14334,9 +14560,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
/// =>
/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
/// As a result, we precompute A/2 prior to the iteration loop.
-SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
@@ -14363,6 +14589,13 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(Est.getNode());
}
+
+ // If non-reciprocal square root is requested, multiply the result by Arg.
+ if (!Reciprocal) {
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
return Est;
}
@@ -14371,35 +14604,55 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
/// =>
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
-SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
- unsigned Iterations,
- SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
- // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
- for (unsigned i = 0; i < Iterations; ++i) {
- SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
- AddToWorklist(HalfEst.getNode());
-
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- AddToWorklist(Est.getNode());
+ // This routine must enter the loop below to work correctly
+ // when (Reciprocal == false).
+ assert(Iterations > 0);
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
- AddToWorklist(Est.getNode());
-
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
- AddToWorklist(Est.getNode());
+ // Newton iterations for reciprocal square root:
+ // E = (E * -0.5) * ((A * E) * E + -3.0)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
+ AddToWorklist(AE.getNode());
+
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
+ AddToWorklist(AEE.getNode());
+
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ AddToWorklist(RHS.getNode());
+
+ // When calculating a square root at the last iteration build:
+ // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
+ // (notice a common subexpression)
+ SDValue LHS;
+ if (Reciprocal || (i + 1) < Iterations) {
+ // RSQRT: LHS = (E * -0.5)
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ } else {
+ // SQRT: LHS = (A * E) * -0.5
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ }
+ AddToWorklist(LHS.getNode());
- Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
AddToWorklist(Est.getNode());
}
+
return Est;
}
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
+/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
+/// Op can be zero.
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
+ bool Reciprocal) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -14410,9 +14663,9 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
AddToWorklist(Est.getNode());
if (Iterations) {
- Est = UseOneConstNR ?
- BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
- BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
+ Est = UseOneConstNR
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
}
return Est;
}
@@ -14420,6 +14673,30 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
return SDValue();
}
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, true);
+}
+
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+ SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
+ if (!Est)
+ return SDValue();
+
+ // Unfortunately, Est is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ EVT VT = Est.getValueType();
+ SDLoc DL(Op);
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = getSetCCResultType(VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+
+ Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
+ Zero, Est);
+ AddToWorklist(Est.getNode());
+ return Est;
+}
+
/// Return true if base is a frame index, which is known not to alias with
/// anything but itself. Provides base object and offset as results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
@@ -14514,7 +14791,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
(Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
(Op0->getMemoryVT().getSizeInBits() >> 3 ==
Op1->getMemoryVT().getSizeInBits() >> 3) &&
- (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
+ (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
@@ -14634,63 +14911,6 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
break;
}
}
-
- // We need to be careful here to also search for aliases through the
- // value operand of a store, etc. Consider the following situation:
- // Token1 = ...
- // L1 = load Token1, %52
- // S1 = store Token1, L1, %51
- // L2 = load Token1, %52+8
- // S2 = store Token1, L2, %51+8
- // Token2 = Token(S1, S2)
- // L3 = load Token2, %53
- // S3 = store Token2, L3, %52
- // L4 = load Token2, %53+8
- // S4 = store Token2, L4, %52+8
- // If we search for aliases of S3 (which loads address %52), and we look
- // only through the chain, then we'll miss the trivial dependence on L1
- // (which also loads from %52). We then might change all loads and
- // stores to use Token1 as their chain operand, which could result in
- // copying %53 into %52 before copying %52 into %51 (which should
- // happen first).
- //
- // The problem is, however, that searching for such data dependencies
- // can become expensive, and the cost is not directly related to the
- // chain depth. Instead, we'll rule out such configurations here by
- // insisting that we've visited all chain users (except for users
- // of the original chain, which is not necessary). When doing this,
- // we need to look through nodes we don't care about (otherwise, things
- // like register copies will interfere with trivial cases).
-
- SmallVector<const SDNode *, 16> Worklist;
- for (const SDNode *N : Visited)
- if (N != OriginalChain.getNode())
- Worklist.push_back(N);
-
- while (!Worklist.empty()) {
- const SDNode *M = Worklist.pop_back_val();
-
- // We have already visited M, and want to make sure we've visited any uses
- // of M that we care about. For uses that we've not visisted, and don't
- // care about, queue them to the worklist.
-
- for (SDNode::use_iterator UI = M->use_begin(),
- UIE = M->use_end(); UI != UIE; ++UI)
- if (UI.getUse().getValueType() == MVT::Other &&
- Visited.insert(*UI).second) {
- if (isa<MemSDNode>(*UI)) {
- // We've not visited this use, and we care about it (it could have an
- // ordering dependency with the original node).
- Aliases.clear();
- Aliases.push_back(OriginalChain);
- return;
- }
-
- // We've not visited this use, but we don't care about it. Mark it as
- // visited and enqueue it to the worklist.
- Worklist.push_back(*UI);
- }
- }
}
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
@@ -14713,17 +14933,17 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
-bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
- BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+ if (BasePtr.Base.isUndef())
return false;
SmallVector<StoreSDNode *, 8> ChainedStores;
@@ -14742,7 +14962,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
break;
// Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
if (!Ptr.equalBaseIndex(BasePtr))
@@ -14756,6 +14976,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
// We found a store node. Use it for the next iteration.
+ if (STn->isVolatile() || STn->isIndexed()) {
+ Index = nullptr;
+ break;
+ }
ChainedStores.push_back(STn);
Index = STn;
break;
@@ -14769,7 +14993,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
}
}
- bool MadeChange = false;
+ bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
for (StoreSDNode *ChainedStore : ChainedStores) {
@@ -14777,7 +15001,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
if (Chain != BetterChain) {
- MadeChange = true;
+ if (ChainedStore == St)
+ MadeChangeToSt = true;
BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
}
}
@@ -14787,7 +15012,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
for (auto Replacement : BetterChains)
replaceStoreChain(Replacement.first, Replacement.second);
- return MadeChange;
+ return MadeChangeToSt;
}
/// This is the entry point for the file.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index cfbb209..b10da00 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,7 +39,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -56,6 +55,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -88,6 +88,8 @@ void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
+ IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
Alignment = CS->getParamAlignment(AttrIdx);
}
@@ -351,7 +353,8 @@ void FastISel::recomputeInsertPt() {
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
- assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) &&
+ std::distance(I, E) > 0 && "Invalid iterator!");
while (I != E) {
MachineInstr *Dead = &*I;
++I;
@@ -372,7 +375,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() {
void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
- LastLocalValue = std::prev(FuncInfo.InsertPt);
+ LastLocalValue = &*std::prev(FuncInfo.InsertPt);
// Restore the previous insert position.
FuncInfo.InsertPt = OldInsertPt.InsertPt;
@@ -492,13 +495,11 @@ bool FastISel::selectGetElementPtr(const User *I) {
uint64_t TotalOffs = 0;
// FIXME: What's a good SWAG number for MaxOffs?
uint64_t MaxOffs = 2048;
- Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy(DL);
- for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1,
- E = I->op_end();
- OI != E; ++OI) {
- const Value *Idx = *OI;
- if (auto *StTy = dyn_cast<StructType>(Ty)) {
+ for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (auto *StTy = dyn_cast<StructType>(*GTI)) {
uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
@@ -511,9 +512,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
TotalOffs = 0;
}
}
- Ty = StTy->getElementType(Field);
} else {
- Ty = cast<SequentialType>(Ty)->getElementType();
+ Type *Ty = GTI.getIndexedType();
// If this is a constant subscript, handle it quickly.
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
@@ -880,9 +880,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
unsigned NumArgs) {
ImmutableCallSite CS(CI);
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- Type *RetTy = FTy->getReturnType();
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
ArgListTy Args;
Args.reserve(NumArgs);
@@ -960,6 +959,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
Flags.setInReg();
if (Arg.IsSRet)
Flags.setSRet();
+ if (Arg.IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Arg.IsSwiftError)
+ Flags.setSwiftError();
if (Arg.IsByVal)
Flags.setByVal();
if (Arg.IsInAlloca) {
@@ -1010,9 +1013,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
bool FastISel::lowerCall(const CallInst *CI) {
ImmutableCallSite CS(CI);
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FuncTy = cast<FunctionType>(PT->getElementType());
- Type *RetTy = FuncTy->getReturnType();
+ FunctionType *FuncTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
ArgListTy Args;
ArgListEntry Entry;
@@ -1322,6 +1324,15 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
+// Return true if we should copy from swift error to the final vreg as specified
+// by SwiftErrorWorklist.
+static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI,
+ FunctionLoweringInfo &FuncInfo) {
+ if (!TLI.supportSwiftError())
+ return false;
+ return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB);
+}
+
// Remove local value instructions starting from the instruction after
// SavedLastLocalValue to the current function insert point.
void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1345,7 +1356,11 @@ bool FastISel::selectInstruction(const Instruction *I) {
MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
- if (isa<TerminatorInst>(I))
+ if (isa<TerminatorInst>(I)) {
+ // If we need to materialize any vreg from worklist, we bail out of
+ // FastISel.
+ if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo))
+ return false;
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
@@ -1354,6 +1369,13 @@ bool FastISel::selectInstruction(const Instruction *I) {
removeDeadLocalValueCode(SavedLastLocalValue);
return false;
}
+ }
+
+ // FastISel does not handle any operand bundles except OB_funclet.
+ if (ImmutableCallSite CS = ImmutableCallSite(I))
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+ if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ return false;
DbgLoc = I->getDebugLoc();
@@ -1413,7 +1435,8 @@ bool FastISel::selectInstruction(const Instruction *I) {
/// Emit an unconditional branch to the given block, unless it is the immediate
/// (fall-through) successor, and update the CFG.
-void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
+ const DebugLoc &DbgLoc) {
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
@@ -2053,7 +2076,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
return false;
}
- FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
DbgLoc = DebugLoc();
}
}
@@ -2138,7 +2161,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
const Value *Ptr;
Type *ValTy;
unsigned Alignment;
- unsigned Flags;
+ MachineMemOperand::Flags Flags;
bool IsVolatile;
if (const auto *LI = dyn_cast<LoadInst>(I)) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 08815ed..e669ffc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -88,6 +88,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
RegInfo = &MF->getRegInfo();
MachineModuleInfo &MMI = MF->getMMI();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -96,6 +97,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
Fn->isVarArg(), Outs, Fn->getContext());
+ // If this personality uses funclets, we need to do a bit more work.
+ DenseMap<const AllocaInst *, int *> CatchObjects;
+ EHPersonality Personality = classifyEHPersonality(
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
+ if (isFuncletEHPersonality(Personality)) {
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (const AllocaInst *AI = H.CatchObj.Alloca)
+ CatchObjects.insert({AI, &H.CatchObj.FrameIndex});
+ else
+ H.CatchObj.FrameIndex = INT_MAX;
+ }
+ }
+ }
+
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
@@ -108,7 +134,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
unsigned Align =
std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
AI->getAlignment());
- unsigned StackAlign = TFI->getStackAlignment();
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
@@ -120,9 +145,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ int FrameIndex = INT_MAX;
+ auto Iter = CatchObjects.find(AI);
+ if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
+ FrameIndex = MF->getFrameInfo()->CreateFixedObject(
+ TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+ MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align);
+ } else {
+ FrameIndex =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+ }
- StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+ StaticAllocaMap[AI] = FrameIndex;
+ // Update the catch handler information.
+ if (Iter != CatchObjects.end())
+ *Iter->second = FrameIndex;
} else {
// FIXME: Overaligned static allocas should be grouped into
// a single dynamic allocation instead of using a separate
@@ -281,31 +318,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
LPads.push_back(LPI);
}
- // If this personality uses funclets, we need to do a bit more work.
- if (!Fn->hasPersonalityFn())
- return;
- EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
if (!isFuncletEHPersonality(Personality))
return;
- // Calculate state numbers if we haven't already.
WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
- if (Personality == EHPersonality::MSVC_CXX)
- calculateWinCXXEHStateNumbers(&fn, EHInfo);
- else if (isAsynchronousEHPersonality(Personality))
- calculateSEHStateNumbers(&fn, EHInfo);
- else if (Personality == EHPersonality::CoreCLR)
- calculateClrEHStateNumbers(&fn, EHInfo);
// Map all BB references in the WinEH data to MBBs.
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
- if (H.CatchObj.Alloca) {
- assert(StaticAllocaMap.count(H.CatchObj.Alloca));
- H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca];
- } else {
- H.CatchObj.FrameIndex = INT_MAX;
- }
if (H.Handler)
H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
}
@@ -336,7 +356,7 @@ void FunctionLoweringInfo::clear() {
ByValArgFrameIndexMap.clear();
RegFixups.clear();
StatepointStackSlots.clear();
- StatepointRelocatedValues.clear();
+ StatepointSpillMaps.clear();
PreferredExtendType.clear();
}
@@ -575,3 +595,21 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
}
}
}
+
+unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB,
+ const Value* Val) const {
+ // Find the index in SwiftErrorVals.
+ SwiftErrorValues::const_iterator I =
+ std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
+ assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
+ return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()];
+}
+
+void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB,
+ const Value* Val, unsigned VReg) {
+ // Find the index in SwiftErrorVals.
+ SwiftErrorValues::iterator I =
+ std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
+ assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
+ SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a1e2d41..c8af73a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -319,7 +320,6 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
"Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
unsigned VReg = getVR(Op, VRBaseMap);
- assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
const MCInstrDesc &MCID = MIB->getDesc();
bool isOptDef = IIOpNum < MCID.getNumOperands() &&
@@ -333,6 +333,8 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
const TargetRegisterClass *DstRC = nullptr;
if (IIOpNum < II->getNumOperands())
DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
+ assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) &&
+ "Expected VReg");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
@@ -440,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
}
unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
- MVT VT, DebugLoc DL) {
+ MVT VT, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -873,7 +875,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
- TLI->AdjustInstrPostInstrSelection(MIB, Node);
+ TLI->AdjustInstrPostInstrSelection(*MIB, Node);
}
/// EmitSpecialNode - Generate machine code for a target-independent node and
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 3b24d93..8a8a1bb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -83,8 +83,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// ConstrainForSubReg - Try to constrain VReg to a register class that
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
- unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
- MVT VT, DebugLoc DL);
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
+ const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
@@ -132,7 +132,7 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
-
+
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f783634..18ad910 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -93,25 +93,25 @@ private:
/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
- SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
- SDValue Idx, SDLoc dl);
- SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
- SDValue Idx, SDLoc dl);
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
- SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl,
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- bool &NeedInvert, SDLoc dl);
+ bool &NeedInvert, const SDLoc &dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
- unsigned NumOps, bool isSigned, SDLoc dl);
+ unsigned NumOps, bool isSigned, const SDLoc &dl);
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
@@ -128,26 +128,28 @@ private:
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl);
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
- void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
- SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+ void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL,
+ SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL,
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
- SDLoc dl);
+ const SDLoc &dl);
SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
- SDLoc dl);
+ const SDLoc &dl);
SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
- SDLoc dl);
+ const SDLoc &dl);
- SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl);
- SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
- SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
+ SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
+ SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -176,8 +178,6 @@ public:
"Replacing one node with another that produces a different number "
"of values!");
DAG.ReplaceAllUsesWith(Old, New);
- for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
- DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i));
if (UpdatedNodes)
UpdatedNodes->insert(New);
ReplacedNode(Old);
@@ -187,7 +187,6 @@ public:
dbgs() << " with: "; New->dump(&DAG));
DAG.ReplaceAllUsesWith(Old, New);
- DAG.TransferDbgValues(Old, New);
if (UpdatedNodes)
UpdatedNodes->insert(New.getNode());
ReplacedNode(Old.getNode());
@@ -200,7 +199,6 @@ public:
DEBUG(dbgs() << (i == 0 ? " with: "
: " and: ");
New[i]->dump(&DAG));
- DAG.TransferDbgValues(SDValue(Old, i), New[i]);
if (UpdatedNodes)
UpdatedNodes->insert(New[i].getNode());
}
@@ -213,10 +211,9 @@ public:
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
-SDValue
-SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
- SDValue N1, SDValue N2,
- ArrayRef<int> Mask) const {
+SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
+ EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
unsigned NumDestElts = NVT.getVectorNumElements();
unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -224,7 +221,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
if (NumEltsGrowth == 1)
- return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumMaskElts; ++i) {
@@ -238,7 +235,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl,
}
assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
- return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask);
}
/// Expands the ConstantFP node to an integer constant or
@@ -285,13 +282,12 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
SDValue Result = DAG.getExtLoad(
ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
- false, false, false, Alignment);
+ Alignment);
return Result;
}
- SDValue Result =
- DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, Alignment);
+ SDValue Result = DAG.getLoad(
+ OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
return Result;
}
@@ -302,301 +298,20 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue Result =
- DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
- false, false, false, Alignment);
+ SDValue Result = DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
return Result;
}
-/// Expands an unaligned store to 2 half-size stores.
-static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
- const TargetLowering &TLI,
- SelectionDAGLegalize *DAGLegalize) {
- assert(ST->getAddressingMode() == ISD::UNINDEXED &&
- "unaligned indexed stores not implemented!");
- SDValue Chain = ST->getChain();
- SDValue Ptr = ST->getBasePtr();
- SDValue Val = ST->getValue();
- EVT VT = Val.getValueType();
- int Alignment = ST->getAlignment();
- unsigned AS = ST->getAddressSpace();
-
- SDLoc dl(ST);
- if (ST->getMemoryVT().isFloatingPoint() ||
- ST->getMemoryVT().isVector()) {
- EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- if (TLI.isTypeLegal(intVT)) {
- // Expand to a bitconvert of the value to the integer type of the
- // same size, then a (misaligned) int store.
- // FIXME: Does not handle truncating floating point stores!
- SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
- Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
- ST->isVolatile(), ST->isNonTemporal(), Alignment);
- DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
- return;
- }
- // Do a (aligned) store to a stack slot, then copy from the stack slot
- // to the final destination using (unaligned) integer loads and stores.
- EVT StoredVT = ST->getMemoryVT();
- MVT RegVT =
- TLI.getRegisterType(*DAG.getContext(),
- EVT::getIntegerVT(*DAG.getContext(),
- StoredVT.getSizeInBits()));
- unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
- // Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, MachinePointerInfo(),
- StoredVT, false, false, 0);
- SDValue Increment = DAG.getConstant(
- RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS));
- SmallVector<SDValue, 8> Stores;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
- MachinePointerInfo(),
- false, false, false, 0);
- // Store it to the final location. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo().getWithOffset(Offset),
- ST->isVolatile(), ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset)));
- // Increment the pointers.
- Offset += RegBytes;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- }
-
- // The last store may be partial. Do a truncating store. On big-endian
- // machines this requires an extending load from the stack slot to ensure
- // that the bits are in the right place.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (StoredBytes - Offset));
-
- // Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo(),
- MemVT, false, false, false, 0);
-
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo()
- .getWithOffset(Offset),
- MemVT, ST->isVolatile(),
- ST->isNonTemporal(),
- MinAlign(ST->getAlignment(), Offset),
- ST->getAAInfo()));
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
- DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
- return;
- }
- assert(ST->getMemoryVT().isInteger() &&
- !ST->getMemoryVT().isVector() &&
- "Unaligned store of unknown type.");
- // Get the half-size VT
- EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
- int NumBits = NewStoredVT.getSizeInBits();
- int IncrementSize = NumBits / 8;
-
- // Divide the stored value in two parts.
- SDValue ShiftAmount =
- DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(),
- DAG.getDataLayout()));
- SDValue Lo = Val;
- SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
-
- // Store the two parts
- SDValue Store1, Store2;
- Store1 = DAG.getTruncStore(Chain, dl,
- DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
- Ptr, ST->getPointerInfo(), NewStoredVT,
- ST->isVolatile(), ST->isNonTemporal(), Alignment);
-
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- TLI.getPointerTy(DAG.getDataLayout(), AS)));
- Alignment = MinAlign(Alignment, IncrementSize);
- Store2 = DAG.getTruncStore(
- Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT,
- ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo());
-
- SDValue Result =
- DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
- DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
-}
-
-/// Expands an unaligned load to 2 half-size loads.
-static void
-ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
- const TargetLowering &TLI,
- SDValue &ValResult, SDValue &ChainResult) {
- assert(LD->getAddressingMode() == ISD::UNINDEXED &&
- "unaligned indexed loads not implemented!");
- SDValue Chain = LD->getChain();
- SDValue Ptr = LD->getBasePtr();
- EVT VT = LD->getValueType(0);
- EVT LoadedVT = LD->getMemoryVT();
- SDLoc dl(LD);
- if (VT.isFloatingPoint() || VT.isVector()) {
- EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
- if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
- // Expand to a (misaligned) integer load of the same size,
- // then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
- LD->getMemOperand());
- SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
- if (LoadedVT != VT)
- Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
- ISD::ANY_EXTEND, dl, VT, Result);
-
- ValResult = Result;
- ChainResult = newLoad.getValue(1);
- return;
- }
-
- // Copy the value to a (aligned) stack slot using (unaligned) integer
- // loads and stores, then do a (aligned) load from the stack slot.
- MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
- SDValue Increment =
- DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout()));
- SmallVector<SDValue, 8> Stores;
- SDValue StackPtr = StackBase;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the original location.
- SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
- // Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo(), false, false, 0));
- // Increment the pointers.
- Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- }
-
- // The last copy may be partial. Do an extending load.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(Offset),
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(),
- LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
- // Follow the load with a store to the stack slot. Remember the store.
- // On big-endian machines this requires a truncating store to ensure
- // that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo(), MemVT,
- false, false, 0));
-
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
-
- // Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT, false,false, false,
- 0);
-
- // Callers expect a MERGE_VALUES node.
- ValResult = Load;
- ChainResult = TF;
- return;
- }
- assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
- "Unaligned load of unsupported type.");
-
- // Compute the new VT that is half the size of the old one. This is an
- // integer MVT.
- unsigned NumBits = LoadedVT.getSizeInBits();
- EVT NewLoadedVT;
- NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
- NumBits >>= 1;
-
- unsigned Alignment = LD->getAlignment();
- unsigned IncrementSize = NumBits / 8;
- ISD::LoadExtType HiExtType = LD->getExtensionType();
-
- // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
- if (HiExtType == ISD::NON_EXTLOAD)
- HiExtType = ISD::ZEXTLOAD;
-
- // Load the value in two parts
- SDValue Lo, Hi;
- if (DAG.getDataLayout().isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
- NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(), Alignment,
- LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(),LD->isInvariant(),
- MinAlign(Alignment, IncrementSize), LD->getAAInfo());
- } else {
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
- NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(), Alignment,
- LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- MinAlign(Alignment, IncrementSize), LD->getAAInfo());
- }
-
- // aggregate the two parts
- SDValue ShiftAmount =
- DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(),
- DAG.getDataLayout()));
- SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
- Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
-
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- ValResult = Result;
- ChainResult = TF;
-}
-
/// Some target cannot handle a variable insertion index for the
/// INSERT_VECTOR_ELT instruction. In this case, it
/// is necessary to spill the vector being inserted into to memory, perform
/// the insert there, and then read the result back.
-SDValue SelectionDAGLegalize::
-PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
- SDLoc dl) {
+SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
+ SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
SDValue Tmp1 = Vec;
SDValue Tmp2 = Val;
SDValue Tmp3 = Idx;
@@ -618,8 +333,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
// Store the vector.
SDValue Ch = DAG.getStore(
DAG.getEntryNode(), dl, Tmp1, StackPtr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
- false, 0);
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
// Truncate or zero extend offset to target pointer type.
Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
@@ -629,17 +343,15 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
DAG.getConstant(EltSize, dl, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
- false, false, 0);
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), SPFI),
- false, false, false, 0);
+ DAG.getMachineFunction(), SPFI));
}
-
-SDValue SelectionDAGLegalize::
-ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) {
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
// SCALAR_TO_VECTOR requires that the type of the value being inserted
// match the element type of the vector being created, except for
@@ -658,8 +370,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) {
for (unsigned i = 0; i != NumElts; ++i)
ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
- return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
- &ShufOps[0]);
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
}
}
return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
@@ -676,8 +387,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
@@ -686,8 +396,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
SDLoc(CFP), MVT::i32);
- return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
}
if (CFP->getValueType(0) == MVT::f64) {
@@ -696,7 +406,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ Alignment, MMOFlags, AAInfo);
}
if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
@@ -709,14 +419,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(4, dl, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- isVolatile, isNonTemporal, MinAlign(Alignment, 4U),
- AAInfo);
+ MinAlign(Alignment, 4U), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -732,8 +441,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
SDLoc dl(Node);
unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
if (!ST->isTruncatingStore()) {
@@ -754,8 +462,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned AS = ST->getAddressSpace();
unsigned Align = ST->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
break;
}
case TargetLowering::Custom: {
@@ -770,9 +480,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
"Can only promote stores to same size type");
Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr,
- ST->getPointerInfo(), isVolatile,
- isNonTemporal, Alignment, AAInfo);
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -794,8 +503,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
StVT.getStoreSizeInBits());
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
- DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, Alignment, AAInfo);
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ Alignment, MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
@@ -815,9 +524,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- RoundVT,
- isVolatile, isNonTemporal, Alignment,
- AAInfo);
+ RoundVT, Alignment, MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -828,10 +535,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ Hi = DAG.getTruncStore(
+ Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
} else {
// Big endian - avoid unaligned stores.
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
@@ -841,18 +548,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
DAG.getConstant(ExtraWidth, dl,
TLI.getShiftAmountTy(Value.getValueType(), DL)));
Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, isVolatile, isNonTemporal, Alignment,
- AAInfo);
+ RoundVT, Alignment, MMOFlags, AAInfo);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl,
Ptr.getValueType()));
- Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ Lo = DAG.getTruncStore(
+ Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
}
// The order of the stores doesn't matter.
@@ -867,8 +573,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
- ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
break;
}
case TargetLowering::Custom: {
@@ -886,8 +594,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
"Do not know how to expand this store!");
Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -917,13 +625,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
break;
}
case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(RVal, DAG);
- if (Res.getNode()) {
+ if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
RVal = Res;
RChain = Res.getValue(1);
}
@@ -956,9 +664,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
if (SrcWidth != SrcVT.getStoreSizeInBits() &&
@@ -985,10 +691,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
SDValue Result =
- DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
- Chain, Ptr, LD->getPointerInfo(),
- NVT, isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo);
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo);
Ch = Result.getValue(1); // The chain.
@@ -1023,10 +727,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
if (DL.isLittleEndian()) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
- Chain, Ptr,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, isInvariant, Alignment, AAInfo);
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
@@ -1035,8 +738,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1056,19 +759,18 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
// Load the top RoundWidth bits.
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
- LD->getPointerInfo(), RoundVT, isVolatile,
- isNonTemporal, isInvariant, Alignment, AAInfo);
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl,
Ptr.getValueType()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- dl, Node->getValueType(0), Chain, Ptr,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- ExtraVT, isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
// Build a factor node to remember that this load is independent of
// the other one.
@@ -1099,8 +801,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = SDValue(Node, 1);
if (isCustom) {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode()) {
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
Value = Res;
Chain = Res.getValue(1);
}
@@ -1111,8 +812,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
unsigned AS = LD->getAddressSpace();
unsigned Align = LD->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
- ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
}
break;
}
@@ -1297,6 +999,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_DWARF_CFA:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
case ISD::EH_SJLJ_SETUP_DISPATCH:
@@ -1399,8 +1102,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case TargetLowering::Custom: {
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res.getNode()) {
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
if (!(Res.getNode() != Node || Res.getResNo() != 0))
return;
@@ -1467,7 +1169,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// Caches for hasPredecessorHelper
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
-
+ Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
@@ -1485,7 +1187,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load).
- if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
continue;
StackPtr = ST->getBasePtr();
@@ -1498,7 +1200,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// Store the value to a temporary stack slot, then LOAD the returned part.
StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo());
}
// Add the offset to the index.
@@ -1513,12 +1215,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
SDValue NewLoad;
if (Op.getValueType().isVector())
- NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
- MachinePointerInfo(), false, false, false, 0);
+ NewLoad =
+ DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo());
else
- NewLoad = DAG.getExtLoad(
- ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(),
- Vec.getValueType().getVectorElementType(), false, false, false, 0);
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType());
// Replace the chain going out of the store, by the one out of the load.
DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
@@ -1549,8 +1251,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// First store the whole vector.
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Then store the inserted part.
@@ -1566,12 +1267,10 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
StackPtr);
// Store the subvector.
- Ch = DAG.getStore(Ch, dl, Part, SubStackPtr,
- MachinePointerInfo(), false, false, 0);
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
// Finally, load the updated vector.
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
- false, false, false, 0);
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1593,7 +1292,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
- if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (Node->getOperand(i).isUndef()) continue;
unsigned Offset = TypeByteSize*i;
@@ -1605,13 +1304,10 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset),
- EltVT, false, false, 0));
+ PtrInfo.getWithOffset(Offset), EltVT));
} else
- Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx,
- PtrInfo.getWithOffset(Offset),
- false, false, 0));
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, PtrInfo.getWithOffset(Offset)));
}
SDValue StoreChain;
@@ -1621,8 +1317,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo,
- false, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
}
namespace {
@@ -1645,7 +1340,8 @@ struct FloatSignAsInt {
/// containing the sign bit if the target has no integer value capable of
/// holding all bits of the floating-point value.
void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
- SDLoc DL, SDValue Value) const {
+ const SDLoc &DL,
+ SDValue Value) const {
EVT FloatVT = Value.getValueType();
unsigned NumBits = FloatVT.getSizeInBits();
State.FloatVT = FloatVT;
@@ -1669,7 +1365,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
MachineFunction &MF = DAG.getMachineFunction();
State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
- State.FloatPointerInfo, false, false, 0);
+ State.FloatPointerInfo);
SDValue IntPtr;
if (DataLayout.isBigEndian()) {
@@ -1687,9 +1383,8 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
}
State.IntPtr = IntPtr;
- State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
- IntPtr, State.IntPointerInfo, MVT::i8,
- false, false, false, 0);
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
+ State.IntPointerInfo, MVT::i8);
State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
State.SignBit = 7;
}
@@ -1697,16 +1392,16 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
/// Replace the integer value produced by getSignAsIntValue() with a new value
/// and cast the result back to a floating-point type.
SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
- SDLoc DL, SDValue NewIntValue) const {
+ const SDLoc &DL,
+ SDValue NewIntValue) const {
if (!State.Chain)
return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
// Override the part containing the sign bit in the value stored on the stack.
SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
- State.IntPointerInfo, MVT::i8, false, false,
- 0);
+ State.IntPointerInfo, MVT::i8);
return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
- State.FloatPointerInfo, false, false, false, 0);
+ State.FloatPointerInfo);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
@@ -1843,11 +1538,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
/// of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
- SDValue &LHS, SDValue &RHS,
- SDValue &CC,
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
+ SDValue &RHS, SDValue &CC,
bool &NeedInvert,
- SDLoc dl) {
+ const SDLoc &dl) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
@@ -1944,10 +1638,8 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
/// a load from the stack slot to DestVT, extending it if needed.
/// The resultant code need not be legal.
-SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
- EVT SlotVT,
- EVT DestVT,
- SDLoc dl) {
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl) {
// Create the stack frame object.
unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@@ -1969,22 +1661,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
SDValue Store;
if (SrcSize > SlotSize)
- Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- PtrInfo, SlotVT, false, false, SrcAlign);
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
+ SlotVT, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
- Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- PtrInfo, false, false, SrcAlign);
+ Store =
+ DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
- false, false, false, DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
- PtrInfo, SlotVT, false, false, false, DestAlign);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
+ DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1999,11 +1690,10 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
SDValue Ch = DAG.getTruncStore(
DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
- Node->getValueType(0).getVectorElementType(), false, false, 0);
+ Node->getValueType(0).getVectorElementType());
return DAG.getLoad(
Node->getValueType(0), dl, Ch, StackPtr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
}
static bool
@@ -2025,7 +1715,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
NewIntermedVals;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
SDValue Vec;
@@ -2044,7 +1734,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
SmallVector<int, 16> FinalIndices;
FinalIndices.reserve(IntermedVals[i].second.size() +
IntermedVals[i+1].second.size());
-
+
int k = 0;
for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
++j, ++k) {
@@ -2061,7 +1751,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
if (Phase)
Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
IntermedVals[i+1].first,
- ShuffleVec.data());
+ ShuffleVec);
else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
return false;
NewIntermedVals.push_back(
@@ -2092,7 +1782,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
if (Phase)
- Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
return false;
}
@@ -2117,7 +1807,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
bool isConstant = true;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
if (i > 0)
isOnlyLowElement = false;
@@ -2160,7 +1850,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
CI->getZExtValue()));
}
} else {
- assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+ assert(Node->getOperand(i).isUndef());
Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
CV.push_back(UndefValue::get(OpNTy));
}
@@ -2171,13 +1861,13 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(
VT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, Alignment);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
}
SmallSet<SDValue, 16> DefinedValues;
for (unsigned i = 0; i < NumElems; ++i) {
- if (Node->getOperand(i).getOpcode() == ISD::UNDEF)
+ if (Node->getOperand(i).isUndef())
continue;
DefinedValues.insert(Node->getOperand(i));
}
@@ -2187,7 +1877,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SmallVector<int, 8> ShuffleVec(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
+ if (V.isUndef())
continue;
ShuffleVec[i] = V == Value1 ? 0 : NumElems;
}
@@ -2201,7 +1891,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
Vec2 = DAG.getUNDEF(VT);
// Return shuffle(LowValVec, undef, <0,0,0,0>)
- return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
}
} else {
SDValue Res;
@@ -2243,15 +1933,18 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
SDValue InChain = DAG.getEntryNode();
// isTailCall may be true since the callee does not reference caller stack
- // frame. Check if it's in the right position.
+ // frame. Check if it's in the right position and that the return types match.
SDValue TCChain = InChain;
- bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain);
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Node, TCChain) &&
+ (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy());
if (isTailCall)
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2267,7 +1960,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
/// and returning a result of type RetVT.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps,
- bool isSigned, SDLoc dl) {
+ bool isSigned, const SDLoc &dl) {
TargetLowering::ArgListTy Args;
Args.reserve(NumOps);
@@ -2286,7 +1979,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2320,7 +2013,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2415,14 +2108,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
// Remainder is loaded back from the stack frame.
- SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue Rem =
+ DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo());
Results.push_back(CallInfo.first);
Results.push_back(Rem);
}
@@ -2449,8 +2142,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
return false;
// GNU sin/cos functions set errno while sincos does not. Therefore
// combining sin and cos is only safe if unsafe-fpmath is enabled.
- bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU;
- if (isGNU && !TM.Options.UnsafeFPMath)
+ if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath)
return false;
return true;
}
@@ -2528,26 +2220,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0);
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args));
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
- Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
- MachinePointerInfo(), false, false, false, 0));
- Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr,
- MachinePointerInfo(), false, false, false, 0));
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo()));
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
}
/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
- SDValue Op0,
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
- SDLoc dl) {
+ const SDLoc &dl) {
// TODO: Should any fast-math-flags be set for the created nodes?
-
+
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
// simple 32-bit [signed|unsigned] integer to float/double expansion
@@ -2574,18 +2265,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
Op0Mapped = Op0;
}
// store the lo of the constructed double - based on integer input
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, MachinePointerInfo(),
- false, false, 0);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo,
+ MachinePointerInfo());
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
- MachinePointerInfo(),
- false, false, 0);
+ SDValue Store2 =
+ DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo());
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue Load =
+ DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo());
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2733,13 +2422,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(
MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
- false, false, Alignment);
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
else {
SDValue Load = DAG.getExtLoad(
ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
- false, false, false, Alignment);
+ Alignment);
HandleSDNode Handle(Load);
LegalizeOp(Load.getNode());
FudgeInReg = Handle.getValue();
@@ -2753,10 +2442,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
/// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
- EVT DestVT,
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
bool isSigned,
- SDLoc dl) {
+ const SDLoc &dl) {
// First step, figure out the appropriate *INT_TO_FP operation to use.
EVT NewInTy = LegalOp.getValueType();
@@ -2795,10 +2483,9 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
/// operation that returns a larger result.
-SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
- EVT DestVT,
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
bool isSigned,
- SDLoc dl) {
+ const SDLoc &dl) {
// First step, figure out the appropriate FP_TO*INT operation to use.
EVT NewOutTy = DestVT;
@@ -2835,11 +2522,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
}
/// Open code the operations for BITREVERSE.
-SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
unsigned Sz = VT.getScalarSizeInBits();
-
+
SDValue Tmp, Tmp2;
Tmp = DAG.getConstant(0, dl, VT);
for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
@@ -2849,7 +2536,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
else
Tmp2 =
DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
-
+
APInt Shift(Sz, 1);
Shift = Shift.shl(J);
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
@@ -2860,7 +2547,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
}
/// Open code the operations for BSWAP of the specified operation.
-SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
@@ -2914,7 +2601,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
/// Expand the specified bitcount instruction into operations.
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
- SDLoc dl) {
+ const SDLoc &dl) {
switch (Opc) {
default: llvm_unreachable("Cannot expand this yet!");
case ISD::CTPOP: {
@@ -3046,6 +2733,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
break;
+ case ISD::EH_DWARF_CFA: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue Offset = DAG.getNode(ISD::ADD, dl,
+ CfaArg.getValueType(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+ CfaArg.getValueType()),
+ CfaArg);
+ SDValue FA = DAG.getNode(
+ ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()),
+ DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(),
+ FA, Offset));
+ break;
+ }
case ISD::FLT_ROUNDS_:
Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
break;
@@ -3111,10 +2813,38 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
cast<AtomicSDNode>(Node)->getFailureOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
- SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1),
- Res, Node->getOperand(2), ISD::SETEQ);
+ SDValue ExtRes = Res;
+ SDValue LHS = Res;
+ SDValue RHS = Node->getOperand(1);
- Results.push_back(Res.getValue(0));
+ EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT();
+ EVT OuterType = Node->getValueType(0);
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
+ LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType,
+ Node->getOperand(2), DAG.getValueType(AtomicType));
+ ExtRes = LHS;
+ break;
+ case ISD::ZERO_EXTEND:
+ LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ ExtRes = LHS;
+ break;
+ case ISD::ANY_EXTEND:
+ LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
+ SDValue Success =
+ DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ);
+
+ Results.push_back(ExtRes.getValue(0));
Results.push_back(Success);
Results.push_back(Res.getValue(1));
break;
@@ -3400,7 +3130,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
-
+
case ISD::FSIN:
case ISD::FCOS: {
EVT VT = Node->getValueType(0);
@@ -3442,7 +3172,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
DAG.getIntPtrConstant(0, dl));
Results.push_back(
- DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
+ DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
}
}
break;
@@ -3760,10 +3490,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
SDValue LD = DAG.getExtLoad(
ISD::SEXTLOAD, dl, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT,
- false, false, false, 0);
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
Addr = LD;
- if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (TM.isPositionIndependent()) {
// For PIC, the sequence is:
// BRIND(load(Jumptable + index) + RelocBase)
// RelocBase can be JumpTable, GOT or some sort of global base.
@@ -3786,7 +3515,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(2));
} else {
// We test only the i1 bit. Skip the AND if UNDEF.
- Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+ Tmp3 = (Tmp2.isUndef()) ? Tmp2 :
DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
DAG.getConstant(1, dl, Tmp2.getValueType()));
Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
@@ -4008,7 +3737,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -4031,7 +3760,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_CMP_SWAP: {
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
@@ -4048,7 +3777,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("abort",
TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -4269,18 +3998,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
// Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
// already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
- if (Node->getOpcode() == ISD::CTTZ) {
- // FIXME: This should set a bit in the zero extended value instead.
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT),
- Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT),
- ISD::SETEQ);
- Tmp1 = DAG.getSelect(dl, NVT, Tmp2,
- DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ ||
- Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6c0193a..31ebf7b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -149,9 +149,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
if (isLegalInHWReg(N->getValueType(ResNo)))
return SDValue(N, ResNo);
ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
- return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
- TLI.getTypeToTransformTo(*DAG.getContext(),
- CN->getValueType(0)));
+ // In ppcf128, the high 64 bits are always first in memory regardless
+ // of Endianness. LLVM's APFloat representation is not Endian sensitive,
+ // and so always converts into a 128-bit APInt in a non-Endian-sensitive
+ // way. However, APInt's are serialized in an Endian-sensitive fashion,
+ // so on big-Endian targets, the two doubles are output in the wrong
+ // order. Fix this by manually flipping the order of the high 64 bits
+ // and the low 64 bits here.
+ if (DAG.getDataLayout().isBigEndian() &&
+ CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) {
+ uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1],
+ CN->getValueAPF().bitcastToAPInt().getRawData()[0] };
+ APInt Val(128, words);
+ return DAG.getConstant(Val, SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ } else {
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ }
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -614,12 +631,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDLoc dl(N);
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
SDValue NewL;
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
- NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
- NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getPointerInfo(), NVT, L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment(),
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
+ L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags,
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -629,12 +647,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
}
// Do a non-extending load followed by FP_EXTEND.
- NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
- L->getMemoryVT(), dl, L->getChain(),
- L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
- L->getMemoryVT(), L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment(),
- L->getAAInfo());
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
+ dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(),
+ MMOFlags, L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -800,6 +816,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FCOPYSIGN:
case ISD::FNEG:
case ISD::Register:
+ case ISD::SELECT:
return true;
}
return false;
@@ -1516,7 +1533,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
SDValue &NewRHS,
ISD::CondCode &CCCode,
- SDLoc dl) {
+ const SDLoc &dl) {
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedFloat(NewLHS, LHSLo, LHSHi);
GetExpandedFloat(NewRHS, RHSLo, RHSHi);
@@ -1868,6 +1885,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Binary FP Operations
case ISD::FADD:
case ISD::FDIV:
+ case ISD::FMAXNAN:
+ case ISD::FMINNAN:
case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::FMUL:
@@ -2063,13 +2082,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
- // Load the value as an integer value with the same number of bits
+ // Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
- IVT, SDLoc(N), L->getChain(), L->getBasePtr(),
- L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(),
- L->isNonTemporal(), false, L->getAlignment(),
- L->getAAInfo());
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
+ SDLoc(N), L->getChain(), L->getBasePtr(),
+ L->getOffset(), L->getPointerInfo(), IVT,
+ L->getAlignment(), MMOFlags, L->getAAInfo());
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
@@ -2102,9 +2122,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
// Construct a SDNode that transforms the SINT or UINT operand to the promoted
// float type.
SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0));
+ SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0));
+ // Round the value to the desired precision (that of the source type).
+ return DAG.getNode(
+ ISD::FP_EXTEND, DL, NVT,
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
}
SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 74f80db..9a18943 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -436,10 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
-
- return DAG.getNode(ISD::AssertZext, dl,
- NVT, Res, DAG.getValueType(N->getValueType(0)));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
@@ -1374,6 +1371,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::OR:
case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+ case ISD::UMAX:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
+
case ISD::ADD:
case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
@@ -1404,7 +1406,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
return ExpandChainLibCall(LC, Node, false);
@@ -1442,15 +1444,6 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, DL, NVT);
Hi = InL;
- } else if (Amt == 1 &&
- TLI.isOperationLegalOrCustom(ISD::ADDC,
- TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
- // Emit this X << 1 as X+X.
- SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
- SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps);
- SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
Hi = DAG.getNode(ISD::OR, DL, NVT,
@@ -1675,6 +1668,54 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
}
}
+static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
+
+ switch (Op) {
+ default: llvm_unreachable("invalid min/max opcode");
+ case ISD::SMAX:
+ return std::make_pair(ISD::SETGT, ISD::UMAX);
+ case ISD::UMAX:
+ return std::make_pair(ISD::SETUGT, ISD::UMAX);
+ case ISD::SMIN:
+ return std::make_pair(ISD::SETLT, ISD::UMIN);
+ case ISD::UMIN:
+ return std::make_pair(ISD::SETULT, ISD::UMIN);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ // Value types
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ // Hi part is always the same op
+ Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
+
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+}
+
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -2006,9 +2047,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
unsigned Alignment = N->getAlignment();
- bool isVolatile = N->isVolatile();
- bool isNonTemporal = N->isNonTemporal();
- bool isInvariant = N->isInvariant();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
@@ -2017,9 +2056,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
- MemVT, isVolatile, isNonTemporal, isInvariant,
- Alignment, AAInfo);
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
+ Alignment, MMOFlags, AAInfo);
// Remember the chain.
Ch = Lo.getValue(1);
@@ -2041,8 +2079,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment,
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
AAInfo);
unsigned ExcessBits =
@@ -2055,8 +2092,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
- isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2074,8 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
- isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo);
+ Alignment, MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2084,8 +2119,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2145,7 +2179,54 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
LC = RTLIB::MUL_I64;
else if (VT == MVT::i128)
LC = RTLIB::MUL_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ unsigned Bits = NVT.getSizeInBits();
+ unsigned HalfBits = Bits >> 1;
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
+ NVT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
+
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+ if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
+ // The type from TLI is too small to fit the shift amount we want.
+ // Override it with i32. The shift will have to be legalized.
+ ShiftAmtTy = MVT::i32;
+ }
+ SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
+ SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
+ SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
+
+ SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
+ DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
+ DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
+ DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ return;
+ }
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
@@ -2495,9 +2576,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
- DAG.getConstant(0, dl, PtrVT), Temp,
- MachinePointerInfo(), false, false, 0);
+ SDValue Chain =
+ DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
+ MachinePointerInfo());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -2522,14 +2603,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
.setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
- SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
- MachinePointerInfo(), false, false, false, 0);
+ SDValue Temp2 =
+ DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
DAG.getConstant(0, dl, PtrVT),
ISD::SETNE);
@@ -2703,7 +2784,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
SDValue &NewRHS,
ISD::CondCode &CCCode,
- SDLoc dl) {
+ const SDLoc &dl) {
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedInteger(NewLHS, LHSLo, LHSHi);
GetExpandedInteger(NewRHS, RHSLo, RHSHi);
@@ -2956,8 +3037,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
unsigned Alignment = N->getAlignment();
- bool isVolatile = N->isVolatile();
- bool isNonTemporal = N->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2967,16 +3047,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- N->getMemoryVT(), isVolatile, isNonTemporal,
- Alignment, AAInfo);
+ N->getMemoryVT(), Alignment, MMOFlags, AAInfo);
}
if (DAG.getDataLayout().isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
unsigned ExcessBits =
N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2986,10 +3065,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
- NEVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ Hi = DAG.getTruncStore(
+ Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -3017,8 +3095,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
}
// Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
- HiVT, isVolatile, isNonTemporal, Alignment, AAInfo);
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment,
+ MMOFlags, AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -3027,8 +3105,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
@@ -3104,7 +3181,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Fudge = DAG.getExtLoad(
ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
- false, false, false, Alignment);
+ Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 2a0b0aa..144bed2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
static cl::opt<bool>
EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
-/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+/// Do extensive, expensive, sanity checking.
void DAGTypeLegalizer::PerformExpensiveChecks() {
// If a node is not processed, then none of its values should be mapped by any
// of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
@@ -174,9 +174,9 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
}
}
-/// run - This is the main entry point for the type legalizer. This does a
-/// top-down traversal of the dag, legalizing types as it goes. Returns "true"
-/// if it made any changes.
+/// This is the main entry point for the type legalizer. This does a top-down
+/// traversal of the dag, legalizing types as it goes. Returns "true" if it made
+/// any changes.
bool DAGTypeLegalizer::run() {
bool Changed = false;
@@ -204,7 +204,7 @@ bool DAGTypeLegalizer::run() {
// Now that we have a set of nodes to process, handle them all.
while (!Worklist.empty()) {
-#ifndef XDEBUG
+#ifndef EXPENSIVE_CHECKS
if (EnableExpensiveChecks)
#endif
PerformExpensiveChecks();
@@ -394,7 +394,7 @@ NodeDone:
}
}
-#ifndef XDEBUG
+#ifndef EXPENSIVE_CHECKS
if (EnableExpensiveChecks)
#endif
PerformExpensiveChecks();
@@ -461,11 +461,10 @@ NodeDone:
return Changed;
}
-/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
-/// new nodes. Correct any processed operands (this may change the node) and
-/// calculate the NodeId. If the node itself changes to a processed node, it
-/// is not remapped - the caller needs to take care of this.
-/// Returns the potentially changed node.
+/// The specified node is the root of a subtree of potentially new nodes.
+/// Correct any processed operands (this may change the node) and calculate the
+/// NodeId. If the node itself changes to a processed node, it is not remapped -
+/// the caller needs to take care of this. Returns the potentially changed node.
SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// If this was an existing node that is already done, we're done.
if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
@@ -536,7 +535,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
return N;
}
-/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// Call AnalyzeNewNode, updating the node in Val if needed.
/// If the node changes to a processed node, then remap it.
void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
Val.setNode(AnalyzeNewNode(Val.getNode()));
@@ -545,7 +544,7 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
RemapValue(Val);
}
-/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// If N has a bogus mapping in ReplacedValues, eliminate it.
/// This can occur when a node is deleted then reallocated as a new node -
/// the mapping in ReplacedValues applies to the deleted node, not the new
/// one.
@@ -626,7 +625,7 @@ void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
ReplacedValues.erase(SDValue(N, i));
}
-/// RemapValue - If the specified value was already legalized to another value,
+/// If the specified value was already legalized to another value,
/// replace it by that value.
void DAGTypeLegalizer::RemapValue(SDValue &N) {
DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
@@ -643,8 +642,8 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) {
}
namespace {
- /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
- /// updates to nodes and recomputes their ready state.
+ /// This class is a DAGUpdateListener that listens for updates to nodes and
+ /// recomputes their ready state.
class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
DAGTypeLegalizer &DTL;
SmallSetVector<SDNode*, 16> &NodesToAnalyze;
@@ -689,9 +688,8 @@ namespace {
}
-/// ReplaceValueWith - The specified value was legalized to the specified other
-/// value. Update the DAG and NodeIds replacing any uses of From to use To
-/// instead.
+/// The specified value was legalized to the specified other value.
+/// Update the DAG and NodeIds replacing any uses of From to use To instead.
void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
assert(From.getNode() != To.getNode() && "Potential legalization loop!");
@@ -905,15 +903,14 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
// Utilities.
//===----------------------------------------------------------------------===//
-/// BitConvertToInteger - Convert to an integer of the same size.
+/// Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
unsigned BitWidth = Op.getValueType().getSizeInBits();
return DAG.getNode(ISD::BITCAST, SDLoc(Op),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
-/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
-/// same size.
+/// Convert to a vector of integers of the same size.
SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
assert(Op.getValueType().isVector() && "Only applies to vectors!");
unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
@@ -930,15 +927,14 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo());
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo());
}
-/// CustomLowerNode - Replace the node's results with custom code provided
-/// by the target and return "true", or do nothing and return "false".
+/// Replace the node's results with custom code provided by the target and
+/// return "true", or do nothing and return "false".
/// The last parameter is FALSE if we are dealing with a node with legal
/// result types and illegal operand. The second parameter denotes the type of
/// illegal OperandNo in that case.
@@ -981,8 +977,8 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
}
-/// CustomWidenLowerNode - Widen the node's results with custom code provided
-/// by the target and return "true", or do nothing and return "false".
+/// Widen the node's results with custom code provided by the target and return
+/// "true", or do nothing and return "false".
bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
// See if the target wants to custom lower this node.
if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
@@ -992,7 +988,7 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
TLI.ReplaceNodeResults(N, Results, DAG);
if (Results.empty())
- // The target didn't want to custom widen lower its result after all.
+ // The target didn't want to custom widen lower its result after all.
return false;
// Update the widening map.
@@ -1010,8 +1006,8 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
return SDValue(N->getOperand(ResNo));
}
-/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
-/// high parts of the given value.
+/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+/// given value.
void DAGTypeLegalizer::GetPairElements(SDValue Pair,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(Pair);
@@ -1038,7 +1034,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
}
-/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+/// Build an integer with low bits Lo and high bits Hi.
SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
// Arbitrarily use dlHi for result SDLoc
SDLoc dlHi(Hi);
@@ -1056,7 +1052,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
-/// LibCallify - Convert the node into a libcall with the same prototype.
+/// Convert the node into a libcall with the same prototype.
SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
bool isSigned) {
unsigned NumOps = N->getNumOperands();
@@ -1080,12 +1076,11 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
}
-// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
-// ExpandLibCall except that the first operand is the in-chain.
+/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
+/// the first operand is the in-chain.
std::pair<SDValue, SDValue>
-DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node,
- bool isSigned) {
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -1106,7 +1101,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1114,9 +1109,9 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
return CallInfo;
}
-/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
-/// of the given type. A target boolean is an integer value, not necessarily of
-/// type i1, the bits of which conform to getBooleanContents.
+/// Promote the given target boolean to a target boolean of the given type.
+/// A target boolean is an integer value, not necessarily of type i1, the bits
+/// of which conform to getBooleanContents.
///
/// ValVT is the type of values that produced the boolean.
SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
@@ -1127,9 +1122,9 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
-/// WidenTargetBoolean - Widen the given target boolean to a target boolean
-/// of the given type. The boolean vector is widened and then promoted to match
-/// the target boolean type of the given ValVT.
+/// Widen the given target boolean to a target boolean of the given type.
+/// The boolean vector is widened and then promoted to match the target boolean
+/// type of the given ValVT.
SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
bool WithZeroes) {
SDLoc dl(Bool);
@@ -1144,8 +1139,7 @@ SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
return PromoteTargetBoolean(Bool, ValVT);
}
-/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
-/// bits in Hi.
+/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi) {
@@ -1159,8 +1153,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
-/// SplitInteger - Return the lower and upper halves of Op's bits in a value
-/// type half the size of Op's.
+/// Return the lower and upper halves of Op's bits in a value type half the
+/// size of Op's.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
SDValue &Lo, SDValue &Hi) {
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
@@ -1173,9 +1167,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
// Entry Point
//===----------------------------------------------------------------------===//
-/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
-/// only uses types natively supported by the target. Returns "true" if it made
-/// any changes.
+/// This transforms the SelectionDAG into a SelectionDAG that only uses types
+/// natively supported by the target. Returns "true" if it made any changes.
///
/// Note that this is an involved process that may invalidate pointers into
/// the graph.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8ba19f7..84ad8f8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -17,7 +17,6 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -26,58 +25,56 @@
namespace llvm {
//===----------------------------------------------------------------------===//
-/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
-/// on it until only value types the target machine can handle are left. This
-/// involves promoting small sizes to large sizes or splitting up large values
-/// into small values.
+/// This takes an arbitrary SelectionDAG as input and hacks on it until only
+/// value types the target machine can handle are left. This involves promoting
+/// small sizes to large sizes or splitting up large values into small values.
///
class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
const TargetLowering &TLI;
SelectionDAG &DAG;
public:
- // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
- // about the state of the node. The enum has all the values.
+ /// This pass uses the NodeId on the SDNodes to hold information about the
+ /// state of the node. The enum has all the values.
enum NodeIdFlags {
- /// ReadyToProcess - All operands have been processed, so this node is ready
- /// to be handled.
+ /// All operands have been processed, so this node is ready to be handled.
ReadyToProcess = 0,
- /// NewNode - This is a new node, not before seen, that was created in the
- /// process of legalizing some other node.
+ /// This is a new node, not before seen, that was created in the process of
+ /// legalizing some other node.
NewNode = -1,
- /// Unanalyzed - This node's ID needs to be set to the number of its
- /// unprocessed operands.
+ /// This node's ID needs to be set to the number of its unprocessed
+ /// operands.
Unanalyzed = -2,
- /// Processed - This is a node that has already been processed.
+ /// This is a node that has already been processed.
Processed = -3
// 1+ - This is a node which has this many unprocessed operands.
};
private:
- /// ValueTypeActions - This is a bitvector that contains two bits for each
- /// simple value type, where the two bits correspond to the LegalizeAction
- /// enum from TargetLowering. This can be queried with "getTypeAction(VT)".
+ /// This is a bitvector that contains two bits for each simple value type,
+ /// where the two bits correspond to the LegalizeAction enum from
+ /// TargetLowering. This can be queried with "getTypeAction(VT)".
TargetLowering::ValueTypeActionImpl ValueTypeActions;
- /// getTypeAction - Return how we should legalize values of this type.
+ /// Return how we should legalize values of this type.
TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
return TLI.getTypeAction(*DAG.getContext(), VT);
}
- /// isTypeLegal - Return true if this type is legal on this target.
+ /// Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
}
- /// isSimpleLegalType - Return true if this is a simple legal type.
+ /// Return true if this is a simple legal type.
bool isSimpleLegalType(EVT VT) const {
return VT.isSimple() && TLI.isTypeLegal(VT);
}
- /// isLegalInHWReg - Return true if this type can be passed in registers.
+ /// Return true if this type can be passed in registers.
/// For example, x86_64's f128, should to be legally in registers
/// and only some operations converted to library calls or integer
/// bitwise operations.
@@ -90,51 +87,49 @@ private:
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
- /// IgnoreNodeResults - Pretend all of this node's results are legal.
+ /// Pretend all of this node's results are legal.
bool IgnoreNodeResults(SDNode *N) const {
return N->getOpcode() == ISD::TargetConstant;
}
- /// PromotedIntegers - For integer nodes that are below legal width, this map
- /// indicates what promoted value to use.
+ /// For integer nodes that are below legal width, this map indicates what
+ /// promoted value to use.
SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
- /// ExpandedIntegers - For integer nodes that need to be expanded this map
- /// indicates which operands are the expanded version of the input.
+ /// For integer nodes that need to be expanded this map indicates which
+ /// operands are the expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
- /// SoftenedFloats - For floating point nodes converted to integers of
- /// the same size, this map indicates the converted value to use.
+ /// For floating-point nodes converted to integers of the same size, this map
+ /// indicates the converted value to use.
SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
- /// PromotedFloats - For floating point nodes that have a smaller precision
- /// than the smallest supported precision, this map indicates what promoted
- /// value to use.
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates what promoted value to use.
SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
- /// ExpandedFloats - For float nodes that need to be expanded this map
- /// indicates which operands are the expanded version of the input.
+ /// For float nodes that need to be expanded this map indicates which operands
+ /// are the expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
- /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
- /// scalar value of type 'ty' to use.
+ /// For nodes that are <1 x ty>, this map indicates the scalar value of type
+ /// 'ty' to use.
SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
- /// SplitVectors - For nodes that need to be split this map indicates
- /// which operands are the expanded version of the input.
+ /// For nodes that need to be split this map indicates which operands are the
+ /// expanded version of the input.
SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
- /// WidenedVectors - For vector nodes that need to be widened, indicates
- /// the widened value to use.
+ /// For vector nodes that need to be widened, indicates the widened value to
+ /// use.
SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
- /// ReplacedValues - For values that have been replaced with another,
- /// indicates the replacement value to use.
+ /// For values that have been replaced with another, indicates the replacement
+ /// value to use.
SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
- /// Worklist - This defines a worklist of nodes to process. In order to be
- /// pushed onto this worklist, all operands of a node must have already been
- /// processed.
+ /// This defines a worklist of nodes to process. In order to be pushed onto
+ /// this worklist, all operands of a node must have already been processed.
SmallVector<SDNode*, 128> Worklist;
public:
@@ -145,7 +140,7 @@ public:
"Too many value types for ValueTypeActions to hold!");
}
- /// run - This is the main entry point for the type legalizer. This does a
+ /// This is the main entry point for the type legalizer. This does a
/// top-down traversal of the dag, legalizing types as it goes. Returns
/// "true" if it made any changes.
bool run();
@@ -173,9 +168,9 @@ private:
bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
bool CustomWidenLowerNode(SDNode *N, EVT VT);
- /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES
- /// node with the corresponding input operand, except for the result 'ResNo',
- /// for which the corresponding input operand is returned.
+ /// Replace each result of the given MERGE_VALUES node with the corresponding
+ /// input operand, except for the result 'ResNo', for which the corresponding
+ /// input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
@@ -201,9 +196,9 @@ private:
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetPromotedInteger - Given a processed operand Op which was promoted to a
- /// larger integer type, this returns the promoted value. The low bits of the
- /// promoted value corresponding to the original type are exactly equal to Op.
+ /// Given a processed operand Op which was promoted to a larger integer type,
+ /// this returns the promoted value. The low bits of the promoted value
+ /// corresponding to the original type are exactly equal to Op.
/// The extra bits contain rubbish, so the promoted value may need to be zero-
/// or sign-extended from the original type before it is usable (the helpers
/// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
@@ -218,8 +213,7 @@ private:
}
void SetPromotedInteger(SDValue Op, SDValue Result);
- /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
- /// final size.
+ /// Get a promoted operand and sign extend it to the final size.
SDValue SExtPromotedInteger(SDValue Op) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
@@ -228,8 +222,7 @@ private:
DAG.getValueType(OldVT));
}
- /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
- /// final size.
+ /// Get a promoted operand and zero extend it to the final size.
SDValue ZExtPromotedInteger(SDValue Op) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
@@ -322,9 +315,9 @@ private:
// Integer Expansion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetExpandedInteger - Given a processed operand Op which was expanded into
- /// two integers of half the size, this returns the two halves. The low bits
- /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+ /// Given a processed operand Op which was expanded into two integers of half
+ /// the size, this returns the two halves. The low bits of Op are exactly
+ /// equal to the bits of Lo; the high bits exactly equal Hi.
/// For example, if Op is an i64 which was expanded into two i32's, then this
/// method returns the two i32's, with Lo being equal to the lower 32 bits of
/// Op, and Hi being equal to the upper 32 bits.
@@ -362,6 +355,8 @@ private:
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -388,14 +383,14 @@ private:
SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, SDLoc dl);
+ ISD::CondCode &CCCode, const SDLoc &dl);
//===--------------------------------------------------------------------===//
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer
- /// if the Op is not supported in target HW and converted to the integer.
+ /// Given an operand Op of Float type, returns the integer if the Op is not
+ /// supported in target HW and converted to the integer.
/// The integer contains exactly the same bits as Op - only the type changed.
/// For example, if Op is an f32 which was softened to an i32, then this method
/// returns an i32, the bits of which coincide with those of Op.
@@ -487,8 +482,8 @@ private:
// Float Expansion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetExpandedFloat - Given a processed operand Op which was expanded into
- /// two floating point values of half the size, this returns the two halves.
+ /// Given a processed operand Op which was expanded into two floating-point
+ /// values of half the size, this returns the two halves.
/// The low bits of Op are exactly equal to the bits of Lo; the high bits
/// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
/// into two f64's, then this method returns the two f64's, with Lo being
@@ -542,8 +537,7 @@ private:
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, SDLoc dl);
-
+ ISD::CondCode &CCCode, const SDLoc &dl);
//===--------------------------------------------------------------------===//
// Float promotion support: LegalizeFloatTypes.cpp
@@ -586,9 +580,9 @@ private:
// Scalarization Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetScalarizedVector - Given a processed one-element vector Op which was
- /// scalarized to its element type, this returns the element. For example,
- /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+ /// Given a processed one-element vector Op which was scalarized to its
+ /// element type, this returns the element. For example, if Op is a v1i32,
+ /// Op = < i32 val >, this method returns val, an i32.
SDValue GetScalarizedVector(SDValue Op) {
SDValue &ScalarizedOp = ScalarizedVectors[Op];
RemapValue(ScalarizedOp);
@@ -636,12 +630,12 @@ private:
// Vector Splitting Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetSplitVector - Given a processed vector Op which was split into vectors
- /// of half the size, this method returns the halves. The first elements of
- /// Op coincide with the elements of Lo; the remaining elements of Op coincide
- /// with the elements of Hi: Op is what you would get by concatenating Lo and
- /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then
- /// this method returns the two v4i32's, with Lo corresponding to the first 4
+ /// Given a processed vector Op which was split into vectors of half the size,
+ /// this method returns the halves. The first elements of Op coincide with the
+ /// elements of Lo; the remaining elements of Op coincide with the elements of
+ /// Hi: Op is what you would get by concatenating Lo and Hi.
+ /// For example, if Op is a v8i32 that was split into two v4i32's, then this
+ /// method returns the two v4i32's, with Lo corresponding to the first 4
/// elements of Op, and Hi to the last 4 elements.
void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
@@ -653,6 +647,7 @@ private:
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -692,12 +687,12 @@ private:
// Vector Widening Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
- /// GetWidenedVector - Given a processed vector Op which was widened into a
- /// larger vector, this method returns the larger vector. The elements of
- /// the returned vector consist of the elements of Op followed by elements
- /// containing rubbish. For example, if Op is a v2i32 that was widened to a
- /// v4i32, then this method returns a v4i32 for which the first two elements
- /// are the same as those of Op, while the last two elements contain rubbish.
+ /// Given a processed vector Op which was widened into a larger vector, this
+ /// method returns the larger vector. The elements of the returned vector
+ /// consist of the elements of Op followed by elements containing rubbish.
+ /// For example, if Op is a v2i32 that was widened to a v4i32, then this
+ /// method returns a v4i32 for which the first two elements are the same as
+ /// those of Op, while the last two elements contain rubbish.
SDValue GetWidenedVector(SDValue Op) {
SDValue &WidenedOp = WidenedVectors[Op];
RemapValue(WidenedOp);
@@ -713,6 +708,7 @@ private:
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
@@ -755,29 +751,29 @@ private:
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//
- /// Helper GenWidenVectorLoads - Helper function to generate a set of
- /// loads to load a vector with a resulting wider type. It takes
+ /// Helper function to generate a set of loads to load a vector with a
+ /// resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD);
- /// GenWidenVectorExtLoads - Helper function to generate a set of extension
- /// loads to load a ector with a resulting wider type. It takes
+ /// Helper function to generate a set of extension loads to load a vector with
+ /// a resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
/// ExtType: extension element type
SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD, ISD::LoadExtType ExtType);
- /// Helper genWidenVectorStores - Helper function to generate a set of
- /// stores to store a widen vector into non-widen memory
+ /// Helper function to generate a set of stores to store a widen vector into
+ /// non-widen memory.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
- /// Helper genWidenVectorTruncStores - Helper function to generate a set of
- /// stores to store a truncate widen vector into non-widen memory
+ /// Helper function to generate a set of stores to store a truncate widen
+ /// vector into non-widen memory.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
@@ -785,8 +781,7 @@ private:
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
- /// When FillWithZeroes is "on" the vector will be widened with
- /// zeroes.
+ /// When FillWithZeroes is "on" the vector will be widened with zeroes.
/// By default, the vector will be widened with undefined values.
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
@@ -807,8 +802,8 @@ private:
GetExpandedFloat(Op, Lo, Hi);
}
- /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
- /// high parts of the given value.
+ /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+ /// given value.
void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
// Generic Result Splitting.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 593c346..665180e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -170,12 +170,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo,
- false, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -185,8 +183,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize), false,
- false, false, MinAlign(Alignment, IncrementSize));
+ PtrInfo.getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
@@ -263,16 +261,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
AAMDNodes AAInfo = LD->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo);
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment,
+ LD->getMemOperand()->getFlags(), AAInfo);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -280,8 +274,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- isVolatile, isNonTemporal, isInvariant,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -478,8 +472,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
unsigned Alignment = St->getAlignment();
- bool isVolatile = St->isVolatile();
- bool isNonTemporal = St->isNonTemporal();
AAMDNodes AAInfo = St->getAAInfo();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -491,15 +483,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
+ St->getMemOperand()->getFlags(), AAInfo);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize), AAInfo);
+ MinAlign(Alignment, IncrementSize),
+ St->getMemOperand()->getFlags(), AAInfo);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f61f631..3c9cb17 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -358,8 +358,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case TargetLowering::Legal:
break;
case TargetLowering::Custom: {
- SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
- if (Tmp1.getNode()) {
+ if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
Result = Tmp1;
break;
}
@@ -493,21 +492,26 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
- SDLoc dl(Op);
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
- SDValue Chain = LD->getChain();
- SDValue BasePTR = LD->getBasePtr();
- EVT SrcVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = LD->getExtensionType();
- SmallVector<SDValue, 8> Vals;
- SmallVector<SDValue, 8> LoadChains;
+ EVT SrcVT = LD->getMemoryVT();
+ EVT SrcEltVT = SrcVT.getScalarType();
unsigned NumElem = SrcVT.getVectorNumElements();
- EVT SrcEltVT = SrcVT.getScalarType();
- EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+ SDValue NewChain;
+ SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
// When elements in a vector is not byte-addressable, we cannot directly
// load each element by advancing pointer, which could only address bytes.
// Instead, we load all significant words, mask bits off, and concatenate
@@ -531,24 +535,22 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
unsigned LoadBytes = WideBytes;
if (RemainingBytes >= LoadBytes) {
- ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
+ ScalarLoad =
+ DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
LoadBytes >>= 1; // Reduce the load size by half.
LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
}
- ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
+ ScalarLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset), LoadVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -614,29 +616,17 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
Vals.push_back(Lo);
}
- } else {
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
- Op.getNode()->getValueType(0).getScalarType(),
- Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
- MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
-
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(Stride, dl, BasePTR.getValueType()));
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), Vals);
+ } else {
+ SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
- Vals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
}
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), Vals);
-
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
@@ -644,54 +634,37 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
- SDLoc dl(Op);
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
- SDValue Chain = ST->getChain();
- SDValue BasePTR = ST->getBasePtr();
- SDValue Value = ST->getValue();
- EVT StVT = ST->getMemoryVT();
-
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
- unsigned NumElem = StVT.getVectorNumElements();
- // The type of the data we want to save
- EVT RegVT = Value.getValueType();
- EVT RegSclVT = RegVT.getScalarType();
- // The type of data as saved in memory.
+ EVT StVT = ST->getMemoryVT();
EVT MemSclVT = StVT.getScalarType();
-
- // Cast floats into integers
unsigned ScalarSize = MemSclVT.getSizeInBits();
// Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
-
- // Store Stride in bytes
- unsigned Stride = ScalarSize/8;
- // Extract each of the elements from the original vector
- // and save them into memory individually.
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
- // This scalar TruncStore may be illegal, but we legalize it later.
- SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
- ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
- AAInfo);
-
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(Stride, dl, BasePTR.getValueType()));
-
- Stores.push_back(Store);
+ if (!isPowerOf2_32(ScalarSize)) {
+ // FIXME: This is completely broken and inconsistent with ExpandLoad
+ // handling.
+
+ // For sub-byte element sizes, this ends up with 0 stride between elements,
+ // so the same element just gets re-written to the same location. There seem
+ // to be tests explicitly testing for this broken behavior though. tests
+ // for this broken behavior.
+
+ LLVMContext &Ctx = *DAG.getContext();
+
+ EVT NewMemVT
+ = EVT::getVectorVT(Ctx,
+ MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
+ StVT.getVectorNumElements());
+
+ SDValue NewVectorStore = DAG.getTruncStore(
+ ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
+ ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST = cast<StoreSDNode>(NewVectorStore.getNode());
}
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
AddLegalizedOperand(Op, TF);
return TF;
}
@@ -864,10 +837,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
int NumSrcElements = SrcVT.getVectorNumElements();
// Build up a zero vector to blend into this one.
- EVT SrcScalarVT = SrcVT.getScalarType();
- SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT);
- SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
// Shuffle the incoming lanes into the correct position, and pull all other
// lanes from the zero vector.
@@ -885,16 +855,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
}
-SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
- EVT VT = Op.getValueType();
-
- // Generate a byte wise shuffle mask for the BSWAP.
- SmallVector<int, 16> ShuffleMask;
+static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+}
+SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
@@ -903,8 +876,7 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
SDLoc DL(Op);
Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
- ShuffleMask.data());
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
@@ -915,12 +887,36 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
return DAG.UnrollVectorOp(Op.getNode());
+ // If the vector element width is a whole number of bytes, test if its legal
+ // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
+ // vector. This greatly reduces the number of bit shifts necessary.
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
+ SmallVector<int, 16> BSWAPMask;
+ createBSWAPShuffleMask(VT, BSWAPMask);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+ if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
+ (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
+ (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ BSWAPMask);
+ Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ }
+
// If we have the appropriate vector bit operations, it is better to use them
// than unrolling and expanding each component.
if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
!TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
return DAG.UnrollVectorOp(Op.getNode());
// Let LegalizeDAG handle this later.
@@ -1027,10 +1023,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
}
SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
- // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
- if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
- return Op;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+ SDLoc DL(Op);
+ return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+ }
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d0187d3..f3adca4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -223,17 +223,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
- SDValue Result = DAG.getLoad(ISD::UNINDEXED,
- N->getExtensionType(),
- N->getValueType(0).getVectorElementType(),
- SDLoc(N),
- N->getChain(), N->getBasePtr(),
- DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->isNonTemporal(),
- N->isInvariant(), N->getOriginalAlignment(),
- N->getAAInfo());
+ SDValue Result = DAG.getLoad(
+ ISD::UNINDEXED, N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
+ N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -370,7 +366,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
// Figure out if the scalar is the LHS or RHS and return it.
SDValue Arg = N->getOperand(2).getOperand(0);
- if (Arg.getOpcode() == ISD::UNDEF)
+ if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
return GetScalarizedVector(N->getOperand(Op));
@@ -476,16 +472,16 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+/// If the value to convert is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::BITCAST, SDLoc(N),
N->getValueType(0), Elt);
}
-/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be
-/// scalarized, it must be <1 x ty>. Do the operation on the element instead.
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the operation on the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexpected vector type!");
@@ -497,8 +493,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
-/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
-/// use a BUILD_VECTOR instead.
+/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
@@ -506,9 +501,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
}
-/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
-/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
-/// index.
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Res = GetScalarizedVector(N->getOperand(0));
if (Res.getValueType() != N->getValueType(0))
@@ -518,8 +512,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
}
-/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be
-/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT
+/// If the input condition is a vector that needs to be scalarized, it must be
+/// <1 x i1>, so just convert to a normal ISD::SELECT
/// (still with vector output type since that was acceptable if we got here).
SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
@@ -529,29 +523,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
N->getOperand(2));
}
-/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
-/// scalarized, it must be <1 x ty>. Just store the element.
+/// If the value to store is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(N->isUnindexed() && "Indexed store of one-element vector?");
assert(OpNo == 1 && "Do not know how to scalarize this operand!");
SDLoc dl(N);
if (N->isTruncatingStore())
- return DAG.getTruncStore(N->getChain(), dl,
- GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getAAInfo());
+ return DAG.getTruncStore(
+ N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(), N->getAlignment(),
+ N->getMemOperand()->getFlags(), N->getAAInfo());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
N->getBasePtr(), N->getPointerInfo(),
- N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment(), N->getAAInfo());
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
}
-/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+/// If the value to round is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
@@ -564,11 +557,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
// Result Vector Splitting
//===----------------------------------------------------------------------===//
-/// SplitVectorResult - This method is called when the specified result of the
-/// specified node is found to need vector splitting. At this point, the node
-/// may also have invalid operands or may have other results that need
-/// legalization, we just know that (at least) one result needs vector
-/// splitting.
+/// This method is called when the specified result of the specified node is
+/// found to need vector splitting. At this point, the node may also have
+/// invalid operands or may have other results that need legalization, we just
+/// know that (at least) one result needs vector splitting.
void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
DEBUG(dbgs() << "Split node result: ";
N->dump(&DAG);
@@ -621,6 +613,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+ break;
+
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CONVERT_RNDSAT:
@@ -664,6 +662,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -845,23 +845,41 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDLoc dl(N);
GetSplitVector(Vec, Lo, Hi);
- // Spill the vector to the stack.
EVT VecVT = Vec.getValueType();
- EVT SubVecVT = VecVT.getVectorElementType();
+ EVT VecElemVT = VecVT.getVectorElementType();
+ unsigned VecElems = VecVT.getVectorNumElements();
+ unsigned SubElems = SubVec.getValueType().getVectorNumElements();
+
+ // If we know the index is 0, and we know the subvector doesn't cross the
+ // boundary between the halves, we can avoid spilling the vector, and insert
+ // into the lower half of the split vector directly.
+ // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
+ // the index is constant and there is no boundary crossing. But those cases
+ // don't seem to get hit in practice.
+ if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = ConstIdx->getZExtValue();
+ if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
+ }
+ }
+
+ // Spill the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Store the new subvector into the specified index.
- SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
+ SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
- false, false, 0);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
+ Lo =
+ DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -871,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -917,6 +935,39 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
DAG.getValueType(HiVT));
}
+void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+
+ SDLoc dl(N);
+ SDValue InLo, InHi;
+ GetSplitVector(N0, InLo, InHi);
+ EVT InLoVT = InLo.getValueType();
+ unsigned InNumElements = InLoVT.getVectorNumElements();
+
+ EVT OutLoVT, OutHiVT;
+ std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned OutNumElements = OutLoVT.getVectorNumElements();
+ assert((2 * OutNumElements) <= InNumElements &&
+ "Illegal extend vector in reg split");
+
+ // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
+ // input vector (i.e. we only use InLo):
+ // OutLo will extend the first OutNumElements from InLo.
+ // OutHi will extend the next OutNumElements from InLo.
+
+ // Shuffle the elements from InLo for OutHi into the bottom elements to
+ // create a 'fake' InHi.
+ SmallVector<int, 8> SplitHi(InNumElements, -1);
+ for (unsigned i = 0; i != OutNumElements; ++i)
+ SplitHi[i] = i + OutNumElements;
+ InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);
+
+ Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
+ Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -947,20 +998,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
- false, false, 0);
+ Store =
+ DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
+ Lo =
+ DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -970,7 +1021,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, MinAlign(Alignment, IncrementSize));
+ MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -995,25 +1046,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment, AAInfo);
+ LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo);
+ LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
+ Alignment, MMOFlags, AAInfo);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1062,7 +1109,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
@@ -1074,7 +1121,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
@@ -1131,7 +1178,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
+ getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
@@ -1362,7 +1409,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue Op1 = InputUsed[1] == -1U ?
DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
// At least one input vector was used. Create a new shuffle vector.
- Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
}
Ops.clear();
@@ -1374,10 +1421,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
-/// SplitVectorOperand - This method is called when the specified operand of the
-/// specified node is found to need vector splitting. At this point, all of the
-/// result types of the node are known to be legal, but other operands of the
-/// node may need legalization as well as the specified one.
+/// This method is called when the specified operand of the specified node is
+/// found to need vector splitting. At this point, all of the result types of
+/// the node are known to be legal, but other operands of the node may need
+/// legalization as well as the specified one.
bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Split node operand: ";
N->dump(&DAG);
@@ -1600,13 +1647,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Store the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT, false, false, false, 0);
+ MachinePointerInfo(), EltVT);
}
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
@@ -1646,7 +1693,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
+ getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(), MGT->getRanges());
@@ -1655,7 +1702,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
OpsLo, MMO);
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
+ getMachineMemOperand(MGT->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
Alignment, MGT->getAAInfo(),
MGT->getRanges());
@@ -1688,7 +1735,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);
-
+
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -1717,7 +1764,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
+ getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
@@ -1729,7 +1776,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
+ getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
@@ -1778,7 +1825,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
+ getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
@@ -1787,7 +1834,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
DL, OpsLo, MMO);
MMO = DAG.getMachineFunction().
- getMachineMemOperand(N->getPointerInfo(),
+ getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
@@ -1810,8 +1857,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue Ptr = N->getBasePtr();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
- bool isVol = N->isVolatile();
- bool isNT = N->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
AAMDNodes AAInfo = N->getAAInfo();
SDValue Lo, Hi;
GetSplitVector(N->getOperand(1), Lo, Hi);
@@ -1822,11 +1868,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment, AAInfo);
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
+ Alignment, MMOFlags, AAInfo);
else
- Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment, AAInfo);
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -1835,11 +1881,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment, AAInfo);
+ HiMemVT, Alignment, MMOFlags, AAInfo);
else
Hi = DAG.getStore(Ch, DL, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment, AAInfo);
+ Alignment, MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}
@@ -1889,7 +1935,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
bool IsFloat = OutVT.isFloatingPoint();
-
+
// Widening should have already made sure this is a power-two vector
// if we're trying to split it at all. assert() that's true, just in case.
assert(!(NumElements & 1) && "Splitting vector, but not in half!");
@@ -2069,6 +2115,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Shift(N);
break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
+ break;
+
case ISD::ANY_EXTEND:
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
@@ -2355,6 +2407,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenSVT = WidenVT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
+ }
+ }
+
+ // Unroll, extend the scalars and rebuild the vector.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
+ break;
+ default:
+ llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
+ }
+ Ops.push_back(Val);
+ }
+
+ while (Ops.size() != WidenNumElts)
+ Ops.push_back(DAG.getUNDEF(WidenSVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
// If this is an FCOPYSIGN with same input types, we can treat it as a
// normal (can trap) binary op.
@@ -2546,7 +2653,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
// The inputs and the result are widen to the same value.
unsigned i;
for (i=1; i < NumOperands; ++i)
- if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ if (!N->getOperand(i).isUndef())
break;
if (i == NumOperands)
@@ -2564,7 +2671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getVectorShuffle(WidenVT, dl,
GetWidenedVector(N->getOperand(0)),
GetWidenedVector(N->getOperand(1)),
- &MaskOps[0]);
+ MaskOps);
}
}
}
@@ -2744,7 +2851,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
-
+
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();
@@ -2898,7 +3005,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
}
for (unsigned i = NumElts; i != WidenNumElts; ++i)
NewMask.push_back(-1);
- return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
@@ -3072,9 +3179,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
- // Since the result is legal and the input is illegal, it is unlikely
- // that we can fix the input to a legal type so unroll the convert
- // into some scalar code and create a nasty build vector.
+ // Since the result is legal and the input is illegal, it is unlikely that we
+ // can fix the input to a legal type so unroll the convert into some scalar
+ // code and create a nasty build vector.
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
@@ -3161,7 +3268,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
- // We have to widen the value but we want only to store the original
+ // We have to widen the value, but we want only to store the original
// vector type.
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -3189,10 +3296,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);
else {
- // The mask should be widened as well
+ // The mask should be widened as well.
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
// We can't use ModifyToType() because we should fill the mask with
- // zeroes
+ // zeroes.
unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();
@@ -3219,16 +3326,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
- // Widen the value
+ // Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
EVT WideVT = WideVal.getValueType();
unsigned NumElts = WideVal.getValueType().getVectorNumElements();
SDLoc dl(N);
- // The mask should be widened as well
+ // The mask should be widened as well.
Mask = WidenTargetBoolean(Mask, WideVT, true);
- // Widen index
+ // Widen index.
SDValue Index = MSC->getIndex();
EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
Index.getValueType().getScalarType(),
@@ -3293,7 +3400,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store
+ // See if there is larger legal integer than the element type to load/store.
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -3355,7 +3462,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
- // Readjust position and vector position based on new load type
+ // Readjust position and vector position based on new load type.
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
}
@@ -3368,8 +3475,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD) {
- // The strategy assumes that we can efficiently load powers of two widths.
- // The routines chops the vector into the largest vector loads with the same
+ // The strategy assumes that we can efficiently load power-of-two widths.
+ // The routine chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
// type.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
@@ -3380,27 +3487,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
// Load information
- SDValue Chain = LD->getChain();
- SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
- int WidthDiff = WidenWidth - LdWidth; // Difference
- unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+ int WidthDiff = WidenWidth - LdWidth;
+ unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Align,
- AAInfo);
+ Align, MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
- // Check if we can load the element with one instruction
+ // Check if we can load the element with one instruction.
if (LdWidth <= NewVTWidth) {
if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
@@ -3421,7 +3525,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
}
- // Load vector by using multiple loads from largest vector to scalar
+ // Load vector by using multiple loads from largest vector to scalar.
SmallVector<SDValue, 16> LdOps;
LdOps.push_back(LdOp);
@@ -3436,13 +3540,12 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
SDValue L;
if (LdWidth < NewVTWidth) {
- // Our current type we are using is too large, find a better size
+ // The current type we are using is too large. Find a better size.
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment),
- AAInfo);
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
if (L->getValueType(0).isVector()) {
SmallVector<SDValue, 16> Loads;
@@ -3456,9 +3559,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment),
- AAInfo);
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
}
@@ -3468,33 +3570,33 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LdWidth -= NewVTWidth;
}
- // Build the vector from the loads operations
+ // Build the vector from the load operations.
unsigned End = LdOps.size();
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
// If the load contains vectors, build the vector using concat vector.
- // All of the vectors used to loads are power of 2 and the scalars load
- // can be combined to make a power of 2 vector.
+ // All of the vectors used to load are power-of-2, and the scalar loads can be
+ // combined to make a power-of-2 vector.
SmallVector<SDValue, 16> ConcatOps(End);
int i = End - 1;
int Idx = End;
EVT LdTy = LdOps[i].getValueType();
- // First combine the scalar loads to a vector
+ // First, combine the scalar loads to a vector.
if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
LdTy = LdOps[i].getValueType();
if (LdTy.isVector())
break;
}
- ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
}
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
- // Create a larger vector
+ // Create a larger vector.
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
makeArrayRef(&ConcatOps[Idx], End - Idx));
Idx = End - 1;
@@ -3503,11 +3605,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
ConcatOps[--Idx] = LdOps[i];
}
- if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
makeArrayRef(&ConcatOps[Idx], End - Idx));
- // We need to fill the rest with undefs to build the vector
+ // We need to fill the rest with undefs to build the vector.
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
@@ -3526,33 +3628,30 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LoadSDNode *LD,
ISD::LoadExtType ExtType) {
// For extension loads, it may not be more efficient to chop up the vector
- // and then extended it. Instead, we unroll the load and build a new vector.
+ // and then extend it. Instead, we unroll the load and build a new vector.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
// Load information
- SDValue Chain = LD->getChain();
- SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
unsigned NumElts = LdVT.getVectorNumElements();
- // Load each element and widen
+ // Load each element and widen.
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
- LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, isInvariant,
- Align, AAInfo);
+ Ops[0] =
+ DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
+ LdEltVT, Align, MMOFlags, AAInfo);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
@@ -3562,12 +3661,11 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
BasePtr.getValueType()));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, isInvariant, Align,
- AAInfo);
+ Align, MMOFlags, AAInfo);
LdChain.push_back(Ops[i].getValue(1));
}
- // Fill the rest with undefs
+ // Fill the rest with undefs.
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
@@ -3578,14 +3676,13 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
- // The strategy assumes that we can efficiently store powers of two widths.
- // The routines chops the vector into the largest vector stores with the same
+ // The strategy assumes that we can efficiently store power-of-two widths.
+ // The routine chops the vector into the largest vector stores with the same
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
unsigned Align = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
@@ -3601,7 +3698,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
int Idx = 0; // current index to store
unsigned Offset = 0; // offset from base to store
while (StWidth != 0) {
- // Find the largest vector type we can store with
+ // Find the largest vector type we can store with.
EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
unsigned NewVTWidth = NewVT.getSizeInBits();
unsigned Increment = NewVTWidth / 8;
@@ -3611,10 +3708,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
SDValue EOp = DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
@@ -3623,28 +3719,27 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
- // Cast the vector to the scalar type we can store
+ // Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
- // Readjust index position based on new vector type
+ // Readjust index position based on new vector type.
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getConstant(Idx++, dl,
TLI.getVectorIdxTy(DAG.getDataLayout())));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
DAG.getConstant(Increment, dl,
BasePtr.getValueType()));
} while (StWidth != 0 && StWidth >= NewVTWidth);
- // Restore index back to be relative to the original widen element type
+ // Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth / ValEltWidth;
}
}
@@ -3654,27 +3749,25 @@ void
DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// For extension loads, it may not be more efficient to truncate the vector
- // and then store it. Instead, we extract each element and then store it.
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
unsigned Align = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
- SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDValue ValOp = GetWidenedVector(ST->getValue());
SDLoc dl(ST);
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
- // It must be true that we the widen vector type is bigger than where
- // we need to store.
+ // It must be true that the wide vector type is bigger than where we need to
+ // store.
assert(StVT.isVector() && ValOp.getValueType().isVector());
assert(StVT.bitsLT(ValOp.getValueType()));
- // For truncating stores, we can not play the tricks of chopping legal
- // vector types and bit cast it to the right type. Instead, we unroll
- // the store.
+ // For truncating stores, we can not play the tricks of chopping legal vector
+ // types and bitcast it to the right type. Instead, we unroll the store.
EVT StEltVT = StVT.getVectorElementType();
EVT ValEltVT = ValVT.getVectorElementType();
unsigned Increment = ValEltVT.getSizeInBits() / 8;
@@ -3683,9 +3776,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo(), StEltVT,
- isVolatile, isNonTemporal, Align,
- AAInfo));
+ ST->getPointerInfo(), StEltVT, Align,
+ MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -3695,10 +3787,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
+ StChain.push_back(DAG.getTruncStore(
+ Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 622e06f..1e5c4a7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -37,7 +37,7 @@ static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Disable use of DFA during scheduling"));
-static cl::opt<signed> RegPressureThreshold(
+static cl::opt<int> RegPressureThreshold(
"dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
cl::desc("Track reg pressure and switch priority to in-depth"));
@@ -323,8 +323,8 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) {
}
}
-signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
- signed RegBalance = 0;
+int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ int RegBalance = 0;
if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
return RegBalance;
@@ -357,8 +357,8 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
/// The RawPressure flag makes this function to ignore
/// existing reg file sizes, and report raw def/use
/// balance.
-signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
- signed RegBalance = 0;
+int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ int RegBalance = 0;
if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
return RegBalance;
@@ -398,9 +398,9 @@ static const unsigned FactorOne = 2;
/// Returns single number reflecting benefit of scheduling SU
/// in the current cycle.
-signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
// Initial trivial priority.
- signed ResCount = 1;
+ int ResCount = 1;
// Do not waste time on a node that is already scheduled.
if (SU->isScheduled)
@@ -601,7 +601,7 @@ SUnit *ResourcePriorityQueue::pop() {
std::vector<SUnit *>::iterator Best = Queue.begin();
if (!DisableDFASched) {
- signed BestCost = SUSchedulingCost(*Best);
+ int BestCost = SUSchedulingCost(*Best);
for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
E = Queue.end(); I != E; ++I) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index c27f8de..237d541 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
-#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/DataTypes.h"
+#include <utility>
namespace llvm {
@@ -56,7 +56,8 @@ public:
// Constructor for non-constants.
SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
uint64_t off, DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) {
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(indir) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
@@ -65,7 +66,8 @@ public:
// Constructor for constants.
SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(false) {
kind = CONST;
u.Const = C;
}
@@ -73,7 +75,8 @@ public:
// Constructor for frame indices.
SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) {
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 91024e6..802c459 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -336,8 +336,8 @@ void ScheduleDAGRRList::Schedule() {
// Build the scheduling graph.
BuildSchedGraph(nullptr);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(for (SUnit &SU : SUnits)
+ SU.dumpAll(this));
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
@@ -1027,43 +1027,37 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SmallVector<SDep, 4> LoadPreds;
SmallVector<SDep, 4> NodePreds;
SmallVector<SDep, 4> NodeSuccs;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- ChainPreds.push_back(*I);
- else if (isOperandOf(I->getSUnit(), LoadNode))
- LoadPreds.push_back(*I);
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
else
- NodePreds.push_back(*I);
+ NodePreds.push_back(Pred);
}
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl())
- ChainSuccs.push_back(*I);
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
else
- NodeSuccs.push_back(*I);
+ NodeSuccs.push_back(Succ);
}
// Now assign edges to the newly-created nodes.
- for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
- const SDep &Pred = ChainPreds[i];
+ for (const SDep &Pred : ChainPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
AddPred(LoadSU, Pred);
}
- for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
- const SDep &Pred = LoadPreds[i];
+ for (const SDep &Pred : LoadPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
AddPred(LoadSU, Pred);
}
- for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
- const SDep &Pred = NodePreds[i];
+ for (const SDep &Pred : NodePreds) {
RemovePred(SU, Pred);
AddPred(NewSU, Pred);
}
- for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
- SDep D = NodeSuccs[i];
+ for (SDep D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1074,8 +1068,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
&& !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
--NewSU->NumRegDefsLeft;
}
- for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
- SDep D = ChainSuccs[i];
+ for (SDep D : ChainSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
@@ -1108,29 +1101,27 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
NewSU = CreateClone(SU);
// New SUnit has the exact same predecessors.
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I)
- if (!I->isArtificial())
- AddPred(NewSU, *I);
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPred(NewSU, Pred);
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isArtificial())
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
continue;
- SUnit *SuccSU = I->getSUnit();
+ SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
- SDep D = *I;
+ SDep D = Succ;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
D.setSUnit(SU);
DelDeps.push_back(std::make_pair(SuccSU, D));
}
}
- for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
- RemovePred(DelDeps[i].first, DelDeps[i].second);
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU);
@@ -1156,16 +1147,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isArtificial())
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
continue;
- SUnit *SuccSU = I->getSUnit();
+ SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
- SDep D = *I;
+ SDep D = Succ;
D.setSUnit(CopyToSU);
AddPred(SuccSU, D);
- DelDeps.push_back(std::make_pair(SuccSU, *I));
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
}
else {
// Avoid scheduling the def-side copy before other successors. Otherwise
@@ -1174,8 +1164,8 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
- for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
- RemovePred(DelDeps[i].first, DelDeps[i].second);
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
@@ -1400,16 +1390,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// All candidates are delayed due to live physical reg dependencies.
// Try backtracking, code duplication, or inserting cross class copies
// to resolve it.
- for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
- SUnit *TrySU = Interferences[i];
+ for (SUnit *TrySU : Interferences) {
SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
// Try unscheduling up to the point where it's safe to schedule
// this node.
SUnit *BtSU = nullptr;
unsigned LiveCycle = UINT_MAX;
- for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
- unsigned Reg = LRegs[j];
+ for (unsigned Reg : LRegs) {
if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
BtSU = LiveRegGens[Reg];
LiveCycle = BtSU->getHeight();
@@ -1854,10 +1842,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
return SethiUllmanNumber;
unsigned Extra = 0;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = I->getSUnit();
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
if (PredSethiUllman > SethiUllmanNumber) {
SethiUllmanNumber = PredSethiUllman;
@@ -1879,8 +1866,8 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
void RegReductionPQBase::CalculateSethiUllmanNumbers() {
SethiUllmanNumbers.assign(SUnits->size(), 0);
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+ for (const SUnit &SU : *SUnits)
+ CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
}
void RegReductionPQBase::addNode(const SUnit *SU) {
@@ -1956,11 +1943,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (!TLI)
return false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
continue;
- SUnit *PredSU = I->getSUnit();
+ SUnit *PredSU = Pred.getSUnit();
// NumRegDefsLeft is zero when enough uses of this node have been scheduled
// to cover the number of registers defined (they are all live).
if (PredSU->NumRegDefsLeft == 0) {
@@ -2006,11 +1992,10 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
LiveUses = 0;
int PDiff = 0;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
continue;
- SUnit *PredSU = I->getSUnit();
+ SUnit *PredSU = Pred.getSUnit();
// NumRegDefsLeft is zero when enough uses of this node have been scheduled
// to cover the number of registers defined (they are all live).
if (PredSU->NumRegDefsLeft == 0) {
@@ -2050,11 +2035,10 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
if (!SU->getNode())
return;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
continue;
- SUnit *PredSU = I->getSUnit();
+ SUnit *PredSU = Pred.getSUnit();
// NumRegDefsLeft is zero when enough uses of this node have been scheduled
// to cover the number of registers defined (they are all live).
if (PredSU->NumRegDefsLeft == 0) {
@@ -2132,11 +2116,10 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
return;
}
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
continue;
- SUnit *PredSU = I->getSUnit();
+ SUnit *PredSU = Pred.getSUnit();
// NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
// counts data deps.
if (PredSU->NumSuccsLeft != PredSU->Succs.size())
@@ -2201,15 +2184,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
unsigned MaxHeight = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain succs
- unsigned Height = I->getSUnit()->getHeight();
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue; // ignore chain succs
+ unsigned Height = Succ.getSUnit()->getHeight();
// If there are bunch of CopyToRegs stacked up, they should be considered
// to be at the same position.
- if (I->getSUnit()->getNode() &&
- I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
- Height = closestSucc(I->getSUnit())+1;
+ if (Succ.getSUnit()->getNode() &&
+ Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(Succ.getSUnit())+1;
if (Height > MaxHeight)
MaxHeight = Height;
}
@@ -2220,9 +2202,8 @@ static unsigned closestSucc(const SUnit *SU) {
/// for scratch registers, i.e. number of data dependencies.
static unsigned calcMaxScratches(const SUnit *SU) {
unsigned Scratches = 0;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
Scratches++;
}
return Scratches;
@@ -2232,10 +2213,9 @@ static unsigned calcMaxScratches(const SUnit *SU) {
/// CopyFromReg from a virtual register.
static bool hasOnlyLiveInOpers(const SUnit *SU) {
bool RetVal = false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *PredSU = I->getSUnit();
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ const SUnit *PredSU = Pred.getSUnit();
if (PredSU->getNode() &&
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
unsigned Reg =
@@ -2255,10 +2235,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
/// it has no other use. It should be scheduled closer to the terminator.
static bool hasOnlyLiveOutUses(const SUnit *SU) {
bool RetVal = false;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *SuccSU = I->getSUnit();
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue;
+ const SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
unsigned Reg =
cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
@@ -2293,10 +2272,9 @@ static void initVRegCycle(SUnit *SU) {
SU->isVRegCycle = true;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- I->getSUnit()->isVRegCycle = true;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ Pred.getSUnit()->isVRegCycle = true;
}
}
@@ -2306,14 +2284,13 @@ static void resetVRegCycle(SUnit *SU) {
if (!SU->isVRegCycle)
return;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = I->getSUnit();
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
if (PredSU->isVRegCycle) {
assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
"VRegCycle def must be CopyFromReg");
- I->getSUnit()->isVRegCycle = 0;
+ Pred.getSUnit()->isVRegCycle = false;
}
}
}
@@ -2325,11 +2302,10 @@ static bool hasVRegCycleUse(const SUnit *SU) {
if (SU->isVRegCycle)
return false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl()) continue; // ignore chain preds
- if (I->getSUnit()->isVRegCycle &&
- I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ if (Pred.getSUnit()->isVRegCycle &&
+ Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
return true;
}
@@ -2684,11 +2660,9 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
CalculateSethiUllmanNumbers();
// For single block loops, mark nodes that look like canonical IV increments.
- if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
- for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
- initVRegCycle(&sunits[i]);
- }
- }
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
+ for (SUnit &SU : sunits)
+ initVRegCycle(&SU);
}
//===----------------------------------------------------------------------===//
@@ -2726,16 +2700,15 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
if(!ImpDefs && !RegMask)
return false;
- for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
- SI != SE; ++SI) {
- SUnit *SuccSU = SI->getSUnit();
- for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
- PE = SuccSU->Preds.end(); PI != PE; ++PI) {
- if (!PI->isAssignedRegDep())
+ for (const SDep &Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+ for (const SDep &SuccPred : SuccSU->Preds) {
+ if (!SuccPred.isAssignedRegDep())
continue;
- if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ if (RegMask &&
+ MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
return true;
if (ImpDefs)
@@ -2743,8 +2716,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
// Return true if SU clobbers this physical register use and the
// definition of the register reaches from DepSU. IsReachable queries
// a topological forward sort of the DAG (following the successors).
- if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
- scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+ if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
return true;
}
}
@@ -2823,19 +2796,18 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
///
void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
- SUnit *SU = &(*SUnits)[i];
+ for (SUnit &SU : *SUnits) {
// For now, only look at nodes with no data successors, such as stores.
// These are especially important, due to the heuristics in
// getNodePriority for nodes with no data successors.
- if (SU->NumSuccs != 0)
+ if (SU.NumSuccs != 0)
continue;
// For now, only look at nodes with exactly one data predecessor.
- if (SU->NumPreds != 1)
+ if (SU.NumPreds != 1)
continue;
// Avoid prescheduling copies to virtual registers, which don't behave
// like other nodes from the perspective of scheduling heuristics.
- if (SDNode *N = SU->getNode())
+ if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
TargetRegisterInfo::isVirtualRegister
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
@@ -2843,10 +2815,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Locate the single data predecessor.
SUnit *PredSU = nullptr;
- for (SUnit::const_pred_iterator II = SU->Preds.begin(),
- EE = SU->Preds.end(); II != EE; ++II)
- if (!II->isCtrl()) {
- PredSU = II->getSUnit();
+ for (const SDep &Pred : SU.Preds)
+ if (!Pred.isCtrl()) {
+ PredSU = Pred.getSUnit();
break;
}
assert(PredSU);
@@ -2860,44 +2831,43 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
// like other nodes from the perspective of scheduling heuristics.
- if (SDNode *N = SU->getNode())
+ if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
// Perform checks on the successors of PredSU.
- for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
- EE = PredSU->Succs.end(); II != EE; ++II) {
- SUnit *PredSuccSU = II->getSUnit();
- if (PredSuccSU == SU) continue;
+ for (const SDep &PredSucc : PredSU->Succs) {
+ SUnit *PredSuccSU = PredSucc.getSUnit();
+ if (PredSuccSU == &SU) continue;
// If PredSU has another successor with no data successors, for
// now don't attempt to choose either over the other.
if (PredSuccSU->NumSuccs == 0)
goto outer_loop_continue;
// Don't break physical register dependencies.
- if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
- if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+ if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
goto outer_loop_continue;
// Don't introduce graph cycles.
- if (scheduleDAG->IsReachable(SU, PredSuccSU))
+ if (scheduleDAG->IsReachable(&SU, PredSuccSU))
goto outer_loop_continue;
}
// Ok, the transformation is safe and the heuristics suggest it is
// profitable. Update the graph.
- DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
<< " next to PredSU #" << PredSU->NodeNum
<< " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
assert(!Edge.isAssignedRegDep());
SUnit *SuccSU = Edge.getSUnit();
- if (SuccSU != SU) {
+ if (SuccSU != &SU) {
Edge.setSUnit(PredSU);
scheduleDAG->RemovePred(SuccSU, Edge);
- scheduleDAG->AddPred(SU, Edge);
- Edge.setSUnit(SU);
+ scheduleDAG->AddPred(&SU, Edge);
+ Edge.setSUnit(&SU);
scheduleDAG->AddPred(SuccSU, Edge);
--i;
}
@@ -2914,16 +2884,15 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
/// If both are two-address, but one is commutable while the other is not
/// commutable, favor the one that's not commutable.
void RegReductionPQBase::AddPseudoTwoAddrDeps() {
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
- SUnit *SU = &(*SUnits)[i];
- if (!SU->isTwoAddress)
+ for (SUnit &SU : *SUnits) {
+ if (!SU.isTwoAddress)
continue;
- SDNode *Node = SU->getNode();
- if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+ SDNode *Node = SU.getNode();
+ if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
continue;
- bool isLiveOut = hasOnlyLiveOutUses(SU);
+ bool isLiveOut = hasOnlyLiveOutUses(&SU);
unsigned Opc = Node->getMachineOpcode();
const MCInstrDesc &MCID = TII->get(Opc);
unsigned NumRes = MCID.getNumDefs();
@@ -2931,21 +2900,22 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
for (unsigned j = 0; j != NumOps; ++j) {
if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
continue;
- SDNode *DU = SU->getNode()->getOperand(j).getNode();
+ SDNode *DU = SU.getNode()->getOperand(j).getNode();
if (DU->getNodeId() == -1)
continue;
const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
- if (!DUSU) continue;
- for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
- E = DUSU->Succs.end(); I != E; ++I) {
- if (I->isCtrl()) continue;
- SUnit *SuccSU = I->getSUnit();
- if (SuccSU == SU)
+ if (!DUSU)
+ continue;
+ for (const SDep &Succ : DUSU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU == &SU)
continue;
// Be conservative. Ignore if nodes aren't at roughly the same
// depth and height.
- if (SuccSU->getHeight() < SU->getHeight() &&
- (SU->getHeight() - SuccSU->getHeight()) > 1)
+ if (SuccSU->getHeight() < SU.getHeight() &&
+ (SU.getHeight() - SuccSU->getHeight()) > 1)
continue;
// Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
// constrains whatever is using the copy, instead of the copy
@@ -2961,8 +2931,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
continue;
// Don't constrain nodes with physical register defs if the
// predecessor can clobber them.
- if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
- if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+ if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
continue;
}
// Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
@@ -2972,14 +2942,14 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
SuccOpc == TargetOpcode::INSERT_SUBREG ||
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
- if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
+ if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
(!canClobber(SuccSU, DUSU) ||
(isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
- (!SU->isCommutable && SuccSU->isCommutable)) &&
- !scheduleDAG->IsReachable(SuccSU, SU)) {
+ (!SU.isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, &SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
- << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
- scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 2a6c853..3be622f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -321,7 +321,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
- SmallPtrSet<SDNode*, 64> Visited;
+ SmallPtrSet<SDNode*, 32> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
@@ -750,7 +750,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
return;
}
- Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos())));
+ Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos())));
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 893871f..29d11c7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -33,7 +34,6 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -46,7 +46,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cmath>
@@ -94,8 +93,22 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// ISD Namespace
//===----------------------------------------------------------------------===//
-/// isBuildVectorAllOnes - Return true if the specified node is a
-/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ auto *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasUndefs;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
+ EltVT.getSizeInBits() >= SplatBitSize;
+}
+
+// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
+// specializations of the more general isConstantSplatVector()?
+
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
@@ -106,7 +119,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
unsigned i = 0, e = N->getNumOperands();
// Skip over all of the undef values.
- while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+ while (i != e && N->getOperand(i).isUndef())
++i;
// Do not accept an all-undef vector.
@@ -135,15 +148,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// undefs. Even with the above element type twiddling, this should be OK, as
// the same type legalization should have applied to all the elements.
for (++i; i != e; ++i)
- if (N->getOperand(i) != NotZero &&
- N->getOperand(i).getOpcode() != ISD::UNDEF)
+ if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
return false;
return true;
}
-
-/// isBuildVectorAllZeros - Return true if the specified node is a
-/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
@@ -153,7 +162,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
bool IsAllUndef = true;
for (const SDValue &Op : N->op_values()) {
- if (Op.getOpcode() == ISD::UNDEF)
+ if (Op.isUndef())
continue;
IsAllUndef = false;
// Do not accept build_vectors that aren't all constants or which have non-0
@@ -181,14 +190,12 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
return true;
}
-/// \brief Return true if the specified node is a BUILD_VECTOR node of
-/// all ConstantSDNode or undef.
bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Op : N->op_values()) {
- if (Op.getOpcode() == ISD::UNDEF)
+ if (Op.isUndef())
continue;
if (!isa<ConstantSDNode>(Op))
return false;
@@ -196,14 +203,12 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
return true;
}
-/// \brief Return true if the specified node is a BUILD_VECTOR node of
-/// all ConstantFPSDNode or undef.
bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Op : N->op_values()) {
- if (Op.getOpcode() == ISD::UNDEF)
+ if (Op.isUndef())
continue;
if (!isa<ConstantFPSDNode>(Op))
return false;
@@ -211,8 +216,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
return true;
}
-/// allOperandsUndef - Return true if the node has at least one operand
-/// and all operands of the specified node are ISD::UNDEF.
bool ISD::allOperandsUndef(const SDNode *N) {
// Return false if the node has no operands.
// This is "logically inconsistent" with the definition of "all" but
@@ -221,7 +224,7 @@ bool ISD::allOperandsUndef(const SDNode *N) {
return false;
for (const SDValue &Op : N->op_values())
- if (Op.getOpcode() != ISD::UNDEF)
+ if (!Op.isUndef())
return false;
return true;
@@ -242,8 +245,6 @@ ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
llvm_unreachable("Invalid LoadExtType");
}
-/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
-/// when given the operation for (X op Y).
ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
// To perform this operation, we just need to swap the L and G bits of the
// operation.
@@ -254,8 +255,6 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
(OldG << 2)); // New L bit.
}
-/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
-/// 'op' is a valid SetCC operation.
ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
unsigned Operation = Op;
if (isInteger)
@@ -270,9 +269,9 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
}
-/// isSignedOp - For an integer comparison, return 1 if the comparison is a
-/// signed operation and 2 if the result is an unsigned comparison. Return zero
-/// if the operation does not depend on the sign of the input (setne and seteq).
+/// For an integer comparison, return 1 if the comparison is a signed operation
+/// and 2 if the result is an unsigned comparison. Return zero if the operation
+/// does not depend on the sign of the input (setne and seteq).
static int isSignedOp(ISD::CondCode Opcode) {
switch (Opcode) {
default: llvm_unreachable("Illegal integer setcc operation!");
@@ -289,10 +288,6 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
}
-/// getSetCCOrOperation - Return the result of a logical OR between different
-/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function
-/// returns SETCC_INVALID if it is not possible to represent the resultant
-/// comparison.
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
bool isInteger) {
if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
@@ -313,10 +308,6 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
return ISD::CondCode(Op);
}
-/// getSetCCAndOperation - Return the result of a logical AND between different
-/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This
-/// function returns zero if it is not possible to represent the resultant
-/// comparison.
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
bool isInteger) {
if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
@@ -644,7 +635,8 @@ void SelectionDAG::DeleteNode(SDNode *N) {
}
void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
- assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+ assert(N->getIterator() != AllNodes.begin() &&
+ "Cannot delete the entry node!");
assert(N->use_empty() && "Cannot delete a node that is not dead!");
// Drop all of the operands and decrement used node's use counts.
@@ -663,8 +655,8 @@ void SDDbgInfo::erase(const SDNode *Node) {
}
void SelectionDAG::DeallocateNode(SDNode *N) {
- if (N->OperandsNeedDelete)
- delete[] N->OperandList;
+ // If we have operands, deallocate them.
+ removeOperands(N);
// Set the opcode to DELETED_NODE to help catch bugs when node
// memory is reallocated.
@@ -832,7 +824,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
- SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
if (const SDNodeFlags *Flags = N->getFlags())
Node->intersectFlagsWith(Flags);
@@ -853,7 +845,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
- SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
if (const SDNodeFlags *Flags = N->getFlags())
Node->intersectFlagsWith(Flags);
@@ -873,16 +865,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
- SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
if (const SDNodeFlags *Flags = N->getFlags())
Node->intersectFlagsWith(Flags);
return Node;
}
-/// getEVTAlignment - Compute the default alignment value for the
-/// given type.
-///
unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
Type *Ty = VT == MVT::iPTR ?
PointerType::get(Type::getInt8Ty(*getContext()), 0) :
@@ -911,6 +900,7 @@ void SelectionDAG::init(MachineFunction &mf) {
SelectionDAG::~SelectionDAG() {
assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
delete DbgInfo;
}
@@ -924,24 +914,26 @@ void SelectionDAG::allnodes_clear() {
#endif
}
-BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
- SDVTList VTs, SDValue N1,
- SDValue N2,
- const SDNodeFlags *Flags) {
+SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs, SDValue N1, SDValue N2,
+ const SDNodeFlags *Flags) {
+ SDValue Ops[] = {N1, N2};
+
if (isBinOpWithFlags(Opcode)) {
// If no flags were passed in, use a default flags object.
SDNodeFlags F;
if (Flags == nullptr)
Flags = &F;
- BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
- Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags);
+ auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, *Flags);
+ createOperands(FN, Ops);
return FN;
}
- BinarySDNode *N = new (NodeAllocator)
- BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
return N;
}
@@ -961,19 +953,25 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
}
SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
- DebugLoc DL, void *&InsertPos) {
+ const SDLoc &DL, void *&InsertPos) {
SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
if (N) {
switch (N->getOpcode()) {
- default: break; // Process only regular (non-target) constant nodes.
case ISD::Constant:
case ISD::ConstantFP:
// Erase debug location from the node if the node is used at several
- // different places to do not propagate one location to all uses as it
- // leads to incorrect debug info.
- if (N->getDebugLoc() != DL)
+ // different places. Do not propagate one location to all uses as it
+ // will cause a worse single stepping debugging experience.
+ if (N->getDebugLoc() != DL.getDebugLoc())
N->setDebugLoc(DebugLoc());
break;
+ default:
+ // When the node's point of use is located earlier in the instruction
+ // sequence than its prior point of use, update its debug info to the
+ // earlier location.
+ if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
+ N->setDebugLoc(DL.getDebugLoc());
+ break;
}
}
return N;
@@ -981,6 +979,7 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void SelectionDAG::clear() {
allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
OperandAllocator.Reset();
CSEMap.clear();
@@ -999,25 +998,25 @@ void SelectionDAG::clear() {
DbgInfo->clear();
}
-SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
-SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
-SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
getNode(ISD::TRUNCATE, DL, VT, Op);
}
-SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT,
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
return getNode(ISD::TRUNCATE, SL, VT, Op);
@@ -1026,7 +1025,7 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT,
return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
}
-SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
assert(!VT.isVector() &&
"getZeroExtendInReg should use the vector element type instead of "
"the vector type!");
@@ -1038,7 +1037,8 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
getConstant(Imm, DL, Op.getValueType()));
}
-SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
@@ -1048,7 +1048,8 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
}
-SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
@@ -1058,7 +1059,8 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
}
-SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
@@ -1070,14 +1072,14 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
-SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
+SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue NegOne =
getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
-SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
+SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue TrueValue;
switch (TLI->getBooleanContents(VT)) {
@@ -1093,8 +1095,8 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
-SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT,
- bool isO) {
+SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
@@ -1102,14 +1104,13 @@ SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT,
return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
}
-SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT,
- bool isO)
-{
+SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
}
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
- bool isT, bool isO) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
+ EVT VT, bool isT, bool isO) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
EVT EltVT = VT.getScalarType();
@@ -1134,7 +1135,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypeExpandInteger) {
- APInt NewVal = Elt->getValue();
+ const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
@@ -1168,9 +1169,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
- SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
- getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
- Ops));
+ SDValue Result = getNode(ISD::BITCAST, DL, VT,
+ getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops));
return Result;
}
@@ -1183,37 +1183,34 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
ID.AddBoolean(isO);
void *IP = nullptr;
SDNode *N = nullptr;
- if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
if (!VT.isVector())
return SDValue(N, 0);
if (!N) {
- N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(),
- EltVT);
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
SDValue Result(N, 0);
- if (VT.isVector()) {
- SmallVector<SDValue, 8> Ops;
- Ops.assign(VT.getVectorNumElements(), Result);
- Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
- }
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
return Result;
}
-SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) {
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
}
-SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT,
+SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
}
-SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT,
- bool isTarget){
+SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
+ EVT VT, bool isTarget) {
assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
EVT EltVT = VT.getScalarType();
@@ -1227,47 +1224,42 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT,
ID.AddPointer(&V);
void *IP = nullptr;
SDNode *N = nullptr;
- if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)))
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
if (!VT.isVector())
return SDValue(N, 0);
if (!N) {
- N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(),
- EltVT);
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
}
SDValue Result(N, 0);
- if (VT.isVector()) {
- SmallVector<SDValue, 8> Ops;
- Ops.assign(VT.getVectorNumElements(), Result);
- Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
- }
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
return Result;
}
-SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT,
+SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
bool isTarget) {
EVT EltVT = VT.getScalarType();
- if (EltVT==MVT::f32)
+ if (EltVT == MVT::f32)
return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
- else if (EltVT==MVT::f64)
+ else if (EltVT == MVT::f64)
return getConstantFP(APFloat(Val), DL, VT, isTarget);
- else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 ||
- EltVT==MVT::f16) {
- bool ignored;
- APFloat apf = APFloat(Val);
- apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
- &ignored);
- return getConstantFP(apf, DL, VT, isTarget);
+ else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
+ EltVT == MVT::f16) {
+ bool Ignored;
+ APFloat APF = APFloat(Val);
+ APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &Ignored);
+ return getConstantFP(APF, DL, VT, isTarget);
} else
llvm_unreachable("Unsupported type in getConstantFP");
}
-SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
- EVT VT, int64_t Offset,
- bool isTargetGA,
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
+ EVT VT, int64_t Offset, bool isTargetGA,
unsigned char TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
@@ -1290,12 +1282,11 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
ID.AddInteger(TargetFlags);
ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(),
- DL.getDebugLoc(), GV, VT,
- Offset, TargetFlags);
+ auto *N = newSDNode<GlobalAddressSDNode>(
+ Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1310,7 +1301,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+ auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1329,8 +1320,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
- TargetFlags);
+ auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1355,8 +1345,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
- Alignment, TargetFlags);
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1382,8 +1372,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
- Alignment, TargetFlags);
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1400,8 +1390,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N =
- new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags);
+ auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1415,7 +1404,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+ auto *N = newSDNode<BasicBlockSDNode>(MBB);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1430,7 +1419,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
if (N) return SDValue(N, 0);
- N = new (NodeAllocator) VTSDNode(VT);
+ N = newSDNode<VTSDNode>(VT);
InsertNode(N);
return SDValue(N, 0);
}
@@ -1438,7 +1427,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
SDNode *&N = ExternalSymbols[Sym];
if (N) return SDValue(N, 0);
- N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+ N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
InsertNode(N);
return SDValue(N, 0);
}
@@ -1447,7 +1436,7 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
SDNode *&N = MCSymbols[Sym];
if (N)
return SDValue(N, 0);
- N = new (NodeAllocator) MCSymbolSDNode(Sym, VT);
+ N = newSDNode<MCSymbolSDNode>(Sym, VT);
InsertNode(N);
return SDValue(N, 0);
}
@@ -1458,7 +1447,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
TargetFlags)];
if (N) return SDValue(N, 0);
- N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+ N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
InsertNode(N);
return SDValue(N, 0);
}
@@ -1468,7 +1457,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
CondCodeNodes.resize(Cond+1);
if (!CondCodeNodes[Cond]) {
- CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+ auto *N = newSDNode<CondCodeSDNode>(Cond);
CondCodeNodes[Cond] = N;
InsertNode(N);
}
@@ -1476,41 +1465,42 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
-// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
-// the shuffle mask M that point at N1 to point at N2, and indices that point
-// N2 to point at N1.
-static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
+/// point at N1 to point at N2 and indices that point at N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
std::swap(N1, N2);
ShuffleVectorSDNode::commuteMask(M);
}
-SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
- SDValue N2, const int *Mask) {
+SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
+ SDValue N2, ArrayRef<int> Mask) {
+ assert(VT.getVectorNumElements() == Mask.size() &&
+ "Must have the same number of vector elements as mask elements!");
assert(VT == N1.getValueType() && VT == N2.getValueType() &&
"Invalid VECTOR_SHUFFLE");
// Canonicalize shuffle undef, undef -> undef
- if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef() && N2.isUndef())
return getUNDEF(VT);
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
- unsigned NElts = VT.getVectorNumElements();
- SmallVector<int, 8> MaskVec;
- for (unsigned i = 0; i != NElts; ++i) {
- assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
- MaskVec.push_back(Mask[i]);
- }
+ int NElts = Mask.size();
+ assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ "Index out of range");
+
+ // Copy the mask so we can do any needed cleanup.
+ SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
// Canonicalize shuffle v, v -> v, undef
if (N1 == N2) {
N2 = getUNDEF(VT);
- for (unsigned i = 0; i != NElts; ++i)
- if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+ for (int i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
}
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
commuteShuffle(N1, N2, MaskVec);
// If shuffling a splat, try to blend the splat instead. We do this here so
@@ -1521,8 +1511,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (!Splat)
return;
- for (int i = 0; i < (int)NElts; ++i) {
- if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
continue;
// If this input comes from undef, mark it as such.
@@ -1544,9 +1534,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
// Canonicalize all index into lhs, -> shuffle lhs, undef
// Canonicalize all index into rhs, -> shuffle rhs, undef
bool AllLHS = true, AllRHS = true;
- bool N2Undef = N2.getOpcode() == ISD::UNDEF;
- for (unsigned i = 0; i != NElts; ++i) {
- if (MaskVec[i] >= (int)NElts) {
+ bool N2Undef = N2.isUndef();
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= NElts) {
if (N2Undef)
MaskVec[i] = -1;
else
@@ -1564,15 +1554,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
commuteShuffle(N1, N2, MaskVec);
}
// Reset our undef status after accounting for the mask.
- N2Undef = N2.getOpcode() == ISD::UNDEF;
+ N2Undef = N2.isUndef();
// Re-check whether both sides ended up undef.
- if (N1.getOpcode() == ISD::UNDEF && N2Undef)
+ if (N1.isUndef() && N2Undef)
return getUNDEF(VT);
// If Identity shuffle return that node.
bool Identity = true, AllSame = true;
- for (unsigned i = 0; i != NElts; ++i) {
- if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
if (MaskVec[i] != MaskVec[0]) AllSame = false;
}
if (Identity && NElts)
@@ -1592,7 +1582,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
BitVector UndefElements;
SDValue Splat = BV->getSplatValue(&UndefElements);
// If this is a splat of an undef, shuffling it is also undef.
- if (Splat && Splat.getOpcode() == ISD::UNDEF)
+ if (Splat && Splat.isUndef())
return getUNDEF(VT);
bool SameNumElts =
@@ -1612,11 +1602,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
// If the shuffle itself creates a splat, build the vector directly.
if (AllSame && SameNumElts) {
- const SDValue &Splatted = BV->getOperand(MaskVec[0]);
- SmallVector<SDValue, 8> Ops(NElts, Splatted);
-
EVT BuildVT = BV->getValueType(0);
- SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
@@ -1630,23 +1618,23 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
- for (unsigned i = 0; i != NElts; ++i)
+ for (int i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
void* IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
// Allocate the mask array for the node out of the BumpPtrAllocator, since
// SDNode doesn't have access to it. This memory will be "leaked" when
// the node is deallocated, but recovered when the NodeAllocator is released.
int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
- memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+ std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
+
+ auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
+ dl.getDebugLoc(), MaskAlloc);
+ createOperands(N, Ops);
- ShuffleVectorSDNode *N =
- new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(),
- dl.getDebugLoc(), N1, N2,
- MaskAlloc);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1659,13 +1647,12 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
SDValue Op0 = SV.getOperand(0);
SDValue Op1 = SV.getOperand(1);
- return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
}
-SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
- SDValue Val, SDValue DTy,
- SDValue STy, SDValue Rnd, SDValue Sat,
- ISD::CvtCode Code) {
+SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val,
+ SDValue DTy, SDValue STy, SDValue Rnd,
+ SDValue Sat, ISD::CvtCode Code) {
// If the src and dest types are the same and the conversion is between
// integer types of the same sign or two floats, no conversion is necessary.
if (DTy == STy &&
@@ -1676,12 +1663,13 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
void* IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
- CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
- dl.getDebugLoc(),
- Ops, Code);
+ auto *N =
+ newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1695,7 +1683,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+ auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1709,13 +1697,14 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+ auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
+SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
+ MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
@@ -1724,14 +1713,14 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(),
- dl.getDebugLoc(), Root, Label);
+ auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
int64_t Offset,
bool isTarget,
@@ -1747,8 +1736,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset,
- TargetFlags);
+ auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1766,13 +1754,12 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+ auto *N = newSDNode<SrcValueSDNode>(V);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
@@ -1782,7 +1769,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+ auto *N = newSDNode<MDNodeSDNode>(MD);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1795,8 +1782,7 @@ SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
return getNode(ISD::BITCAST, SDLoc(V), VT, V);
}
-/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
-SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
+SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS) {
SDValue Ops[] = {Ptr};
FoldingSetNodeID ID;
@@ -1805,12 +1791,13 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
ID.AddInteger(DestAS);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(),
- dl.getDebugLoc(),
- VT, Ptr, SrcAS, DestAS);
+ auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VT, SrcAS, DestAS);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -1835,9 +1822,8 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {
SDValue Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad =
- getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, false, 0);
+ SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
+ Tmp2, MachinePointerInfo(V));
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -1856,11 +1842,10 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {
VT.getTypeForEVT(*getContext())),
dl, VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
- Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
- MachinePointerInfo(V), false, false, 0);
+ Tmp1 =
+ getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
// Load the actual argument out of the pointer VAList
- return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
- false, false, false, 0);
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
}
SDValue SelectionDAG::expandVACopy(SDNode *Node) {
@@ -1870,15 +1855,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
// output, returning the chain.
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
- SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
- Node->getOperand(0), Node->getOperand(2),
- MachinePointerInfo(VS), false, false, false, 0);
+ SDValue Tmp1 =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS));
return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
- MachinePointerInfo(VD), false, false, 0);
+ MachinePointerInfo(VD));
}
-/// CreateStackTemporary - Create a stack temporary, suitable for holding the
-/// specified value type.
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
@@ -1890,8 +1873,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
-/// CreateStackTemporary - Create a stack temporary suitable for holding
-/// either of the specified value types.
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
Type *Ty1 = VT1.getTypeForEVT(*getContext());
@@ -1905,8 +1886,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
-SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
- SDValue N2, ISD::CondCode Cond, SDLoc dl) {
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
+ ISD::CondCode Cond, const SDLoc &dl) {
// These setcc operations always fold.
switch (Cond) {
default: break;
@@ -2469,6 +2450,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne.trunc(BitWidth);
break;
}
+ case ISD::BSWAP: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ KnownZero = KnownZero2.byteSwap();
+ KnownOne = KnownOne2.byteSwap();
+ break;
+ }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -2506,12 +2493,36 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
-/// ComputeNumSignBits - Return the number of times the sign bit of the
-/// register is replicated into the other bits. We know that at least 1 bit
-/// is always equal to the sign bit (itself), but other cases can give us
-/// information. For example, immediately after an "SRA X, 2", we know that
-/// the top 3 bits are all equal to each other, so we return 3.
-unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL) {
+ auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ if (C && C->getAPIntValue() == 1)
+ return true;
+ }
+
+ // Similarly, a logical right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL) {
+ auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ if (C && C->getAPIntValue().isSignBit())
+ return true;
+ }
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Val, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarType().getSizeInBits();
@@ -2761,11 +2772,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
-/// isBaseWithConstantOffset - Return true if the specified operand is an
-/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
-/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
-/// semantics as an ADD. This handles the equivalence:
-/// X|Cst == X+Cst iff X&Cst = 0.
bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
!isa<ConstantSDNode>(Op.getOperand(1)))
@@ -2779,7 +2785,6 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
return true;
}
-
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (getTarget().Options.NoNaNsFPMath)
@@ -2834,28 +2839,30 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
return (AZero | BZero).isAllOnesValue();
}
-static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
+static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
llvm::SelectionDAG &DAG) {
if (Ops.size() == 1)
return Ops[0];
// Concat of UNDEFs is UNDEF.
- if (std::all_of(Ops.begin(), Ops.end(),
- [](SDValue Op) { return Op.isUndef(); }))
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
- // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified
- // to one big BUILD_VECTOR.
- // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well.
- if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) {
- return Op.getOpcode() == ISD::BUILD_VECTOR;
- }))
- return SDValue();
-
+ // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
+ // simplified to one big BUILD_VECTOR.
+ // FIXME: Add support for SCALAR_TO_VECTOR as well.
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 16> Elts;
- for (SDValue Op : Ops)
- Elts.append(Op->op_begin(), Op->op_end());
+ for (SDValue Op : Ops) {
+ EVT OpVT = Op.getValueType();
+ if (Op.isUndef())
+ Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
+ else if (Op.getOpcode() == ISD::BUILD_VECTOR)
+ Elts.append(Op->op_begin(), Op->op_end());
+ else
+ return SDValue();
+ }
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
@@ -2871,25 +2878,24 @@ static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops,
return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
-/// getNode - Gets or creates the specified node.
-///
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
+/// Gets or creates the specified node.
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, getVTList(VT), None);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), getVTList(VT));
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ getVTList(VT));
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
- EVT VT, SDValue Operand) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
@@ -3054,7 +3060,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid fpext node, dst < src!");
- if (Operand.getOpcode() == ISD::UNDEF)
+ if (Operand.isUndef())
return getUNDEF(VT);
break;
case ISD::SIGN_EXTEND:
@@ -3148,6 +3154,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::BITREVERSE:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid BITREVERSE!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
@@ -3192,20 +3204,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
SDNode *N;
SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {Operand};
if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
- SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, Operand);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, Operand);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
}
InsertNode(N);
@@ -3250,8 +3262,8 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
return std::make_pair(APInt(1, 0), false);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
- const ConstantSDNode *Cst1,
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, const ConstantSDNode *Cst1,
const ConstantSDNode *Cst2) {
if (Cst1->isOpaque() || Cst2->isOpaque())
return SDValue();
@@ -3263,8 +3275,29 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
return getConstant(Folded.first, DL, VT);
}
-SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
- SDNode *Cst1, SDNode *Cst2) {
+SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
+ const GlobalAddressSDNode *GA,
+ const SDNode *N2) {
+ if (GA->getOpcode() != ISD::GlobalAddress)
+ return SDValue();
+ if (!TLI->isOffsetFoldingLegal(GA))
+ return SDValue();
+ const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst2)
+ return SDValue();
+ int64_t Offset = Cst2->getSExtValue();
+ switch (Opcode) {
+ case ISD::ADD: break;
+ case ISD::SUB: Offset = -uint64_t(Offset); break;
+ default: return SDValue();
+ }
+ return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
+ GA->getOffset() + uint64_t(Offset));
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNode *Cst1,
+ SDNode *Cst2) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
@@ -3274,21 +3307,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
- if (SDValue Folded =
- FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) {
- if (!VT.isVector())
- return Folded;
- SmallVector<SDValue, 4> Outputs;
- // We may have a vector type but a scalar result. Create a splat.
- Outputs.resize(VT.getVectorNumElements(), Outputs.back());
- // Build a big vector out of the scalar elements we generated.
- return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
- } else {
- return SDValue();
- }
+ SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
}
}
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
+ return FoldSymbolOffset(Opcode, VT, GA, Cst2);
+ if (isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
+ return FoldSymbolOffset(Opcode, VT, GA, Cst1);
+
// For vectors extract each constant element into Inputs so we can constant
// fold them individually.
BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
@@ -3329,11 +3361,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
Outputs.resize(VT.getVectorNumElements(), Outputs.back());
// Build a big vector out of the scalar elements we generated.
- return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
+ return getBuildVector(VT, SDLoc(), Outputs);
}
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
- EVT VT,
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
+ const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
const SDNodeFlags *Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
@@ -3355,8 +3387,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
- return (Op.getOpcode() == ISD::UNDEF) ||
- (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
+ return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
+ (BV && BV->isConstant());
};
// All operands must be vector types with the same number of elements as
@@ -3375,7 +3407,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
EVT LegalSVT = VT.getScalarType();
if (LegalSVT.isInteger()) {
LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
- if (LegalSVT.bitsLT(SVT))
+ if (LegalSVT.bitsLT(VT.getScalarType()))
return SDValue();
}
@@ -3414,20 +3446,18 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
- if (ScalarResult.getOpcode() != ISD::UNDEF &&
- ScalarResult.getOpcode() != ISD::Constant &&
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
ScalarResult.getOpcode() != ISD::ConstantFP)
return SDValue();
ScalarResults.push_back(ScalarResult);
}
- assert(ScalarResults.size() == NumElts &&
- "Unexpected number of scalar results for BUILD_VECTOR");
- return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults);
+ return getBuildVector(VT, DL, ScalarResults);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
- SDValue N2, const SDNodeFlags *Flags) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2,
+ const SDNodeFlags *Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3617,14 +3647,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
};
if (N1C) {
- APInt Val = N1C->getAPIntValue();
+ const APInt &Val = N1C->getAPIntValue();
return SignExtendInReg(Val);
}
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
- if (Op.getOpcode() == ISD::UNDEF) {
+ if (Op.isUndef()) {
Ops.push_back(getUNDEF(VT.getScalarType()));
continue;
}
@@ -3637,13 +3667,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
break;
}
if (Ops.size() == VT.getVectorNumElements())
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ return getBuildVector(VT, DL, Ops);
}
break;
}
case ISD::EXTRACT_VECTOR_ELT:
// EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
@@ -3802,7 +3832,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Canonicalize an UNDEF to the RHS, even over a constant.
- if (N1.getOpcode() == ISD::UNDEF) {
+ if (N1.isUndef()) {
if (isCommutativeBinOp(Opcode)) {
std::swap(N1, N2);
} else {
@@ -3831,10 +3861,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Fold a bunch of operators when the RHS is undef.
- if (N2.getOpcode() == ISD::UNDEF) {
+ if (N2.isUndef()) {
switch (Opcode) {
case ISD::XOR:
- if (N1.getOpcode() == ISD::UNDEF)
+ if (N1.isUndef())
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
@@ -3877,21 +3907,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Memoize this node if possible.
- BinarySDNode *N;
+ SDNode *N;
SDVTList VTs = getVTList(VT);
if (VT != MVT::Glue) {
SDValue Ops[] = {N1, N2};
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
if (Flags)
E->intersectFlagsWith(Flags);
return SDValue(E, 0);
}
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
-
CSEMap.InsertNode(N, IP);
} else {
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
@@ -3901,7 +3930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
switch (Opcode) {
@@ -3982,36 +4011,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2, N3};
if (VT != MVT::Glue) {
- SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2, N3);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3,
- SDValue N4) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
return getNode(Opcode, DL, VT, Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- SDValue N1, SDValue N2, SDValue N3,
- SDValue N4, SDValue N5) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
return getNode(Opcode, DL, VT, Ops);
}
@@ -4041,8 +4069,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
- SDLoc dl) {
- assert(Value.getOpcode() != ISD::UNDEF);
+ const SDLoc &dl) {
+ assert(!Value.isUndef());
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
@@ -4069,13 +4097,9 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
}
if (VT != Value.getValueType() && !VT.isInteger())
- Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
- if (VT != Value.getValueType()) {
- assert(VT.getVectorElementType() == Value.getValueType() &&
- "value type should be one vector element here");
- SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
- Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
- }
+ Value = DAG.getBitcast(VT.getScalarType(), Value);
+ if (VT != Value.getValueType())
+ Value = DAG.getSplatBuildVector(VT, dl, Value);
return Value;
}
@@ -4083,7 +4107,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
-static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
+static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
const TargetLowering &TLI, StringRef Str) {
// Handle vector with all elements zero.
if (Str.empty()) {
@@ -4124,19 +4148,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
return SDValue(nullptr, 0);
}
-/// getMemBasePlusOffset - Returns base and offset node for the
-///
-static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl,
- SelectionDAG &DAG) {
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ const SDLoc &DL) {
EVT VT = Base.getValueType();
- return DAG.getNode(ISD::ADD, dl,
- VT, Base, DAG.getConstant(Offset, dl, VT));
+ return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
}
/// isMemSrcFromString - Returns true if memcpy source is a string constant.
///
static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
- unsigned SrcDelta = 0;
+ uint64_t SrcDelta = 0;
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
G = cast<GlobalAddressSDNode>(Src);
@@ -4149,7 +4170,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
if (!G)
return false;
- return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
+ return getConstantStringInfo(G->getGlobal(), Str,
+ SrcDelta + G->getOffset(), false);
}
/// Determines the optimal series of memory ops to replace the memset / memcpy.
@@ -4163,6 +4185,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
bool ZeroMemset,
bool MemcpyStrSrc,
bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
SelectionDAG &DAG,
const TargetLowering &TLI) {
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -4179,10 +4202,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction());
if (VT == MVT::Other) {
- unsigned AS = 0;
- if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) ||
- TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
- VT = TLI.getPointerTy(DAG.getDataLayout());
+ if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
+ VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
} else {
switch (DstAlign & 7) {
case 0: VT = MVT::i64; break;
@@ -4238,10 +4260,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
// FIXME: Only does this for 64-bit or more since we don't have proper
// cost model for unaligned load / store.
bool Fast;
- unsigned AS = 0;
if (NumMemOps && AllowOverlap &&
VTSize >= 8 && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast)
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
VTSize = Size;
else {
VT = NewVT;
@@ -4267,15 +4288,14 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
return MF.getFunction()->optForSize();
}
-static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align, bool isVol,
- bool AlwaysInline,
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
- if (Src.getOpcode() == ISD::UNDEF)
+ if (Src.isUndef())
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
@@ -4302,7 +4322,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
- false, false, CopyFromStr, true, DAG, TLI))
+ false, false, CopyFromStr, true,
+ DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(),
+ DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -4325,6 +4348,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
}
}
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
SmallVector<SDValue, 8> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
@@ -4351,9 +4376,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
if (Value.getNode())
Store = DAG.getStore(Chain, dl, Value,
- getMemBasePlusOffset(Dst, DstOff, dl, DAG),
- DstPtrInfo.getWithOffset(DstOff), isVol,
- false, Align);
+ DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
}
if (!Store.getNode()) {
@@ -4365,13 +4389,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
- getMemBasePlusOffset(Src, SrcOff, dl, DAG),
- SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
- false, MinAlign(SrcAlign, SrcOff));
- Store = DAG.getTruncStore(Chain, dl, Value,
- getMemBasePlusOffset(Dst, DstOff, dl, DAG),
- DstPtrInfo.getWithOffset(DstOff), VT, isVol,
- false, Align);
+ DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ MinAlign(SrcAlign, SrcOff), MMOFlags);
+ OutChains.push_back(Value.getValue(1));
+ Store = DAG.getTruncStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -4382,15 +4406,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
-static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align, bool isVol,
- bool AlwaysInline,
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
- if (Src.getOpcode() == ISD::UNDEF)
+ if (Src.isUndef())
return Chain;
// Expand memmove to a series of load and store ops if the size operand falls
@@ -4411,7 +4434,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align), SrcAlign,
- false, false, false, false, DAG, TLI))
+ false, false, false, false,
+ DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(),
+ DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -4425,6 +4451,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
}
}
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
uint64_t SrcOff = 0, DstOff = 0;
SmallVector<SDValue, 8> LoadValues;
SmallVector<SDValue, 8> LoadChains;
@@ -4435,10 +4463,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
- Value = DAG.getLoad(VT, dl, Chain,
- getMemBasePlusOffset(Src, SrcOff, dl, DAG),
- SrcPtrInfo.getWithOffset(SrcOff), isVol,
- false, false, SrcAlign);
+ Value =
+ DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -4451,8 +4478,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
SDValue Store;
Store = DAG.getStore(Chain, dl, LoadValues[i],
- getMemBasePlusOffset(Dst, DstOff, dl, DAG),
- DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+ DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -4478,13 +4505,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
/// The function tries to replace 'llvm.memset' intrinsic with several store
/// operations and value calculation code. This is usually profitable for small
/// memory size.
-static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align, bool isVol,
+static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
- if (Src.getOpcode() == ISD::UNDEF)
+ if (Src.isUndef())
return Chain;
// Expand memset to a series of load/store ops if the size operand
@@ -4502,7 +4528,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
- true, IsZeroVal, false, true, DAG, TLI))
+ true, IsZeroVal, false, true,
+ DstPtrInfo.getAddrSpace(), ~0u,
+ DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
@@ -4548,10 +4576,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
- SDValue Store = DAG.getStore(Chain, dl, Value,
- getMemBasePlusOffset(Dst, DstOff, dl, DAG),
- DstPtrInfo.getWithOffset(DstOff),
- isVol, false, Align);
+ SDValue Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
Size -= VTSize;
@@ -4570,10 +4598,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
}
}
-SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVol, bool AlwaysInline,
- bool isTailCall, MachinePointerInfo DstPtrInfo,
+SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4632,10 +4660,10 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*getContext()),
+ Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
TLI->getPointerTy(getDataLayout())),
- std::move(Args), 0)
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -4643,9 +4671,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
return CallResult.second;
}
-SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVol, bool isTailCall,
+SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4693,10 +4721,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Type::getVoidTy(*getContext()),
+ Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
TLI->getPointerTy(getDataLayout())),
- std::move(Args), 0)
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -4704,9 +4732,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
return CallResult.second;
}
-SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
- SDValue Src, SDValue Size,
- unsigned Align, bool isVol, bool isTailCall,
+SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
MachinePointerInfo DstPtrInfo) {
assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4755,10 +4783,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
CLI.setDebugLoc(dl)
.setChain(Chain)
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Type::getVoidTy(*getContext()),
+ Dst.getValueType().getTypeForEVT(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
TLI->getPointerTy(getDataLayout())),
- std::move(Args), 0)
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -4766,7 +4794,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
return CallResult.second;
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
AtomicOrdering SuccessOrdering,
@@ -4777,41 +4805,31 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void* IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- // Allocate the operands array for the node out of the BumpPtrAllocator, since
- // SDNode doesn't have access to it. This memory will be "leaked" when
- // the node is deallocated, but recovered when the allocator is released.
- // If the number of operands is less than 5 we use AtomicSDNode's internal
- // storage.
- unsigned NumOps = Ops.size();
- SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps)
- : nullptr;
-
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTList, MemVT,
- Ops.data(), DynOps, NumOps, MMO,
- SuccessOrdering, FailureOrdering,
- SynchScope);
+ auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO, SuccessOrdering,
+ FailureOrdering, SynchScope);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO,
- AtomicOrdering Ordering,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
SynchronizationScope SynchScope) {
return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomicCmpSwap(
- unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain,
+ unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment, AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
@@ -4826,10 +4844,8 @@ SDValue SelectionDAG::getAtomicCmpSwap(
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- unsigned Flags = MachineMemOperand::MOVolatile;
- Flags |= MachineMemOperand::MOLoad;
- Flags |= MachineMemOperand::MOStore;
-
+ auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
@@ -4837,9 +4853,9 @@ SDValue SelectionDAG::getAtomicCmpSwap(
SuccessOrdering, FailureOrdering, SynchScope);
}
-SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDVTList VTs, SDValue Chain, SDValue Ptr,
- SDValue Cmp, SDValue Swp,
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
+ EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp,
MachineMemOperand *MMO,
AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
@@ -4853,11 +4869,9 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT,
SuccessOrdering, FailureOrdering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Val,
- const Value* PtrVal,
- unsigned Alignment,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ const Value *PtrVal, unsigned Alignment,
AtomicOrdering Ordering,
SynchronizationScope SynchScope) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
@@ -4870,7 +4884,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
// chained as such.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
- unsigned Flags = MachineMemOperand::MOVolatile;
+ auto Flags = MachineMemOperand::MOVolatile;
if (Opcode != ISD::ATOMIC_STORE)
Flags |= MachineMemOperand::MOLoad;
if (Opcode != ISD::ATOMIC_LOAD)
@@ -4884,11 +4898,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
Ordering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Val,
- MachineMemOperand *MMO,
- AtomicOrdering Ordering,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
SynchronizationScope SynchScope) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
@@ -4912,11 +4924,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- EVT VT, SDValue Chain,
- SDValue Ptr,
- MachineMemOperand *MMO,
- AtomicOrdering Ordering,
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
SynchronizationScope SynchScope) {
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
@@ -4926,7 +4936,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) {
+SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
if (Ops.size() == 1)
return Ops[0];
@@ -4937,17 +4947,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) {
return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
}
-SDValue
-SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
- ArrayRef<SDValue> Ops,
- EVT MemVT, MachinePointerInfo PtrInfo,
- unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem, unsigned Size) {
+SDValue SelectionDAG::getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- unsigned Flags = 0;
+ auto Flags = MachineMemOperand::MONone;
if (WriteMem)
Flags |= MachineMemOperand::MOStore;
if (ReadMem)
@@ -4962,10 +4970,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
-SDValue
-SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
- ArrayRef<SDValue> Ops, EVT MemVT,
- MachineMemOperand *MMO) {
+SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
+ SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
@@ -4982,19 +4990,20 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTList, Ops,
- MemVT, MMO);
- CSEMap.InsertNode(N, IP);
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTList, Ops,
- MemVT, MMO);
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
@@ -5032,50 +5041,40 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
- if (OffsetOp.getOpcode() == ISD::UNDEF)
+ if (OffsetOp.isUndef())
return InferPointerInfo(DAG, Ptr);
return MachinePointerInfo();
}
-
-SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
- EVT VT, SDLoc dl, SDValue Chain,
- SDValue Ptr, SDValue Offset,
- MachinePointerInfo PtrInfo, EVT MemVT,
- bool isVolatile, bool isNonTemporal, bool isInvariant,
- unsigned Alignment, const AAMDNodes &AAInfo,
- const MDNode *Ranges) {
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
- unsigned Flags = MachineMemOperand::MOLoad;
- if (isVolatile)
- Flags |= MachineMemOperand::MOVolatile;
- if (isNonTemporal)
- Flags |= MachineMemOperand::MONonTemporal;
- if (isInvariant)
- Flags |= MachineMemOperand::MOInvariant;
-
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- AAInfo, Ranges);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
-SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
- EVT VT, SDLoc dl, SDValue Chain,
- SDValue Ptr, SDValue Offset, EVT MemVT,
- MachineMemOperand *MMO) {
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
if (VT == MemVT) {
ExtType = ISD::NON_EXTLOAD;
} else if (ExtType == ISD::NON_EXTLOAD) {
@@ -5094,8 +5093,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
}
bool Indexed = AM != ISD::UNINDEXED;
- assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
- "Unindexed load with an offset!");
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
SDVTList VTs = Indexed ?
getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
@@ -5108,100 +5106,90 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(),
- dl.getDebugLoc(), VTs, AM, ExtType,
- MemVT, MMO);
+ auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, MemVT, MMO);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
- SDValue Chain, SDValue Ptr,
- MachinePointerInfo PtrInfo,
- bool isVolatile, bool isNonTemporal,
- bool isInvariant, unsigned Alignment,
- const AAMDNodes &AAInfo,
- const MDNode *Ranges) {
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo, Ranges);
+ PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
}
-SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl,
- SDValue Chain, SDValue Ptr,
- MachineMemOperand *MMO) {
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachineMemOperand *MMO) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
VT, MMO);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
- SDValue Chain, SDValue Ptr,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
MachinePointerInfo PtrInfo, EVT MemVT,
- bool isVolatile, bool isNonTemporal,
- bool isInvariant, unsigned Alignment,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
- return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant,
- Alignment, AAInfo);
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
+ MemVT, Alignment, MMOFlags, AAInfo);
}
-
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT,
- SDValue Chain, SDValue Ptr, EVT MemVT,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
MachineMemOperand *MMO) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
MemVT, MMO);
}
-SDValue
-SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base,
- SDValue Offset, ISD::MemIndexedMode AM) {
+SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
- assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
- "Load is already a indexed load!");
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant flag.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(),
- false, LD->getAlignment());
+ LD->getMemoryVT(), LD->getAlignment(), MMOFlags);
}
-SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment, const AAMDNodes &AAInfo) {
- assert(Chain.getValueType() == MVT::Other &&
- "Invalid chain type");
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
- unsigned Flags = MachineMemOperand::MOStore;
- if (isVolatile)
- Flags |= MachineMemOperand::MOVolatile;
- if (isNonTemporal)
- Flags |= MachineMemOperand::MONonTemporal;
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags,
- Val.getValueType().getStoreSize(), Alignment,
- AAInfo);
-
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
-SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
@@ -5216,46 +5204,42 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
- dl.getDebugLoc(), VTs,
- ISD::UNINDEXED, false, VT, MMO);
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, MachinePointerInfo PtrInfo,
- EVT SVT,bool isVolatile, bool isNonTemporal,
- unsigned Alignment,
+ EVT SVT, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
const AAMDNodes &AAInfo) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
- unsigned Flags = MachineMemOperand::MOStore;
- if (isVolatile)
- Flags |= MachineMemOperand::MOVolatile;
- if (isNonTemporal)
- Flags |= MachineMemOperand::MONonTemporal;
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(*this, Ptr);
MachineFunction &MF = getMachineFunction();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
- AAInfo);
-
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
-SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, EVT SVT,
MachineMemOperand *MMO) {
EVT VT = Val.getValueType();
@@ -5285,24 +5269,24 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
- dl.getDebugLoc(), VTs,
- ISD::UNINDEXED, true, SVT, MMO);
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue
-SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
- SDValue Offset, ISD::MemIndexedMode AM) {
+SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
- assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
- "Store is already a indexed store!");
+ assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
FoldingSetNodeID ID;
@@ -5311,23 +5295,23 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
- dl.getDebugLoc(), VTs, AM,
- ST->isTruncatingStore(),
- ST->getMemoryVT(),
- ST->getMemOperand());
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue
-SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
- MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
+SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0,
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::LoadExtType ExtTy) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
@@ -5340,21 +5324,23 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
- dl.getDebugLoc(), Ops, 4, VTs,
- ExtTy, MemVT, MMO);
+ auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ExtTy, MemVT, MMO);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
- SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, bool isTrunc) {
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool isTrunc) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
@@ -5367,22 +5353,23 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
- dl.getDebugLoc(), Ops, 4,
- VTs, isTrunc, MemVT, MMO);
+ auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ isTrunc, MemVT, MMO);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue
-SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
- ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO) {
+ assert(Ops.size() == 5 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
@@ -5393,21 +5380,34 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- MaskedGatherSDNode *N =
- new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
- Ops, VTs, VT, MMO);
+
+ auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO);
+ createOperands(N, Ops);
+
+ assert(N->getValue().getValueType() == N->getValueType(0) &&
+ "Incompatible type of the PassThru value in MaskedGatherSDNode");
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() ==
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO) {
+ assert(Ops.size() == 5 && "Incompatible number of operands");
+
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
@@ -5416,27 +5416,33 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- SDNode *N =
- new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(),
- Ops, VTs, VT, MMO);
+ auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() ==
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
- SDValue Chain, SDValue Ptr,
- SDValue SV,
- unsigned Align) {
+SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDUse> Ops) {
switch (Ops.size()) {
case 0: return getNode(Opcode, DL, VT);
@@ -5452,7 +5458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
return getNode(Opcode, DL, VT, NewOps);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
@@ -5498,27 +5504,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTs, Ops);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTs, Ops);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
return getNode(Opcode, DL, getVTList(ResultTys), Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
@@ -5548,83 +5555,56 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
// Memoize the node unless it returns a flag.
SDNode *N;
- unsigned NumOps = Ops.size();
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
- if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0]);
- } else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0],
- Ops[1]);
- } else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0],
- Ops[1], Ops[2]);
- } else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTList, Ops);
- }
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
- if (NumOps == 1) {
- N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0]);
- } else if (NumOps == 2) {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0],
- Ops[1]);
- } else if (NumOps == 3) {
- N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTList, Ops[0],
- Ops[1], Ops[2]);
- } else {
- N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTList, Ops);
- }
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
}
InsertNode(N);
return SDValue(N, 0);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTList) {
return getNode(Opcode, DL, VTList, None);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1) {
SDValue Ops[] = { N1 };
return getNode(Opcode, DL, VTList, Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2) {
SDValue Ops[] = { N1, N2 };
return getNode(Opcode, DL, VTList, Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3) {
SDValue Ops[] = { N1, N2, N3 };
return getNode(Opcode, DL, VTList, Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
- SDValue N1, SDValue N2, SDValue N3,
- SDValue N4) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
return getNode(Opcode, DL, VTList, Ops);
}
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
- SDValue N1, SDValue N2, SDValue N3,
- SDValue N4, SDValue N5) {
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
return getNode(Opcode, DL, VTList, Ops);
}
@@ -5932,10 +5912,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDVTList VTs,ArrayRef<SDValue> Ops) {
- N = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
+ SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
// Reset the NodeID to -1.
- N->setNodeId(-1);
- return N;
+ New->setNodeId(-1);
+ if (New != N) {
+ ReplaceAllUsesWith(N, New);
+ RemoveDeadNode(N);
+ }
+ return New;
}
/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away
@@ -5945,7 +5929,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
/// probability having other instructions associated with that line.
///
/// For IROrder, we keep the smaller of the two
-SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
+SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
@@ -5973,13 +5957,12 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
/// deleting things.
SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDVTList VTs, ArrayRef<SDValue> Ops) {
- unsigned NumOps = Ops.size();
// If an identical node already exists, use it.
void *IP = nullptr;
if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops);
- if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP))
+ if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
}
@@ -6002,36 +5985,13 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
DeadNodeSet.insert(Used);
}
- if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
- // Initialize the memory references information.
+ // For MachineNode, initialize the memory references information.
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
MN->setMemRefs(nullptr, nullptr);
- // If NumOps is larger than the # of operands we can have in a
- // MachineSDNode, reallocate the operand list.
- if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
- if (MN->OperandsNeedDelete)
- delete[] MN->OperandList;
- if (NumOps > array_lengthof(MN->LocalOperands))
- // We're creating a final node that will live unmorphed for the
- // remainder of the current SelectionDAG iteration, so we can allocate
- // the operands directly out of a pool with no recycling metadata.
- MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
- Ops.data(), NumOps);
- else
- MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps);
- MN->OperandsNeedDelete = false;
- } else
- MN->InitOperands(MN->OperandList, Ops.data(), NumOps);
- } else {
- // If NumOps is larger than the # of operands we currently have, reallocate
- // the operand list.
- if (NumOps > N->NumOperands) {
- if (N->OperandsNeedDelete)
- delete[] N->OperandList;
- N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps);
- N->OperandsNeedDelete = true;
- } else
- N->InitOperands(N->OperandList, Ops.data(), NumOps);
- }
+
+ // Swap for an appropriately sized array from the recycler.
+ removeOperands(N);
+ createOperands(N, Ops);
// Delete any nodes that are still dead after adding the uses for the
// new operands.
@@ -6055,155 +6015,133 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
/// Note that getMachineNode returns the resultant node. If there is already a
/// node of the specified opcode and operands, it returns that node instead of
/// the current one.
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT) {
SDVTList VTs = getVTList(VT);
return getMachineNode(Opcode, dl, VTs, None);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
- SDValue Op1, SDValue Op2) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
- SDValue Op1, SDValue Op2, SDValue Op3) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2,
+ SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT,
- ArrayRef<SDValue> Ops) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
return getMachineNode(Opcode, dl, VTs, None);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, SDValue Op1) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, SDValue Op1,
- SDValue Op2, SDValue Op3) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2,
- ArrayRef<SDValue> Ops) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2,
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, EVT VT3,
- SDValue Op1, SDValue Op2) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, EVT VT3,
- SDValue Op1, SDValue Op2, SDValue Op3) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2, Op3 };
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- EVT VT1, EVT VT2, EVT VT3,
- ArrayRef<SDValue> Ops) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1,
- EVT VT2, EVT VT3, EVT VT4,
- ArrayRef<SDValue> Ops) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
- ArrayRef<EVT> ResultTys,
- ArrayRef<SDValue> Ops) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ ArrayRef<EVT> ResultTys,
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(ResultTys);
return getMachineNode(Opcode, dl, VTs, Ops);
}
-MachineSDNode *
-SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
- ArrayRef<SDValue> OpsArray) {
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs,
+ ArrayRef<SDValue> Ops) {
bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
void *IP = nullptr;
- const SDValue *Ops = OpsArray.data();
- unsigned NumOps = OpsArray.size();
if (DoCSE) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ~Opcode, VTs, OpsArray);
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops);
IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
}
}
// Allocate a new MachineSDNode.
- N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs);
-
- // Initialize the operands list.
- if (NumOps > array_lengthof(N->LocalOperands))
- // We're creating a final node that will live unmorphed for the
- // remainder of the current SelectionDAG iteration, so we can allocate
- // the operands directly out of a pool with no recycling metadata.
- N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
- Ops, NumOps);
- else
- N->InitOperands(N->LocalOperands, Ops, NumOps);
- N->OperandsNeedDelete = false;
+ N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
if (DoCSE)
CSEMap.InsertNode(N, IP);
@@ -6214,9 +6152,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
/// getTargetExtractSubreg - A convenience function for creating
/// TargetOpcode::EXTRACT_SUBREG nodes.
-SDValue
-SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
- SDValue Operand) {
+SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
VT, Operand, SRIdxVal);
@@ -6225,9 +6162,8 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT,
/// getTargetInsertSubreg - A convenience function for creating
/// TargetOpcode::INSERT_SUBREG nodes.
-SDValue
-SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
- SDValue Operand, SDValue Subreg) {
+SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
VT, Operand, Subreg, SRIdxVal);
@@ -6243,7 +6179,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) {
+ if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
if (Flags)
E->intersectFlagsWith(Flags);
return E;
@@ -6257,7 +6193,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
/// SDNode
SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
unsigned R, bool IsIndirect, uint64_t Off,
- DebugLoc DL, unsigned O) {
+ const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
@@ -6267,7 +6203,7 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
/// Constant
SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
const Value *C, uint64_t Off,
- DebugLoc DL, unsigned O) {
+ const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
@@ -6276,7 +6212,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
/// FrameIndex
SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
unsigned FI, uint64_t Off,
- DebugLoc DL, unsigned O) {
+ const DebugLoc &DL,
+ unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
@@ -6348,6 +6285,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
AddModifiedNodeToCSEMaps(User);
}
+ // Preserve Debug Values
+ TransferDbgValues(FromN, To);
+
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
setRoot(To);
@@ -6371,6 +6311,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
if (From == To)
return;
+ // Preserve Debug Info. Only do this if there's a use.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ if (From->hasAnyUseOfValue(i)) {
+ assert((i < To->getNumValues()) && "Invalid To location");
+ TransferDbgValues(SDValue(From, i), SDValue(To, i));
+ }
+
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
@@ -6410,6 +6357,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
if (From->getNumValues() == 1) // Handle the simple case efficiently.
return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+ // Preserve Debug Info.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ TransferDbgValues(SDValue(From, i), *To);
+
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
@@ -6454,6 +6405,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
return;
}
+ // Preserve Debug Info.
+ TransferDbgValues(From, To);
+
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
SDNode::use_iterator UI = From.getNode()->use_begin(),
@@ -6528,6 +6482,8 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
if (Num == 1)
return ReplaceAllUsesOfValueWith(*From, *To);
+ TransferDbgValues(*From, *To);
+
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
// replacement process.
@@ -6628,7 +6584,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
if (Degree == 0) {
// All of P's operands are sorted, so P may sorted now.
P->setNodeId(DAGSize++);
- if (P != SortedPos)
+ if (P->getIterator() != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
@@ -6637,7 +6593,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
P->setNodeId(Degree);
}
}
- if (&Node == SortedPos) {
+ if (Node.getIterator() == SortedPos) {
#ifndef NDEBUG
allnodes_iterator I(N);
SDNode *S = &*++I;
@@ -6676,7 +6632,7 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
-/// TransferDbgValues - Transfer SDDbgValues.
+/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
if (From == To || !From.getNode()->getHasDebugValue())
return;
@@ -6687,17 +6643,22 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
I != E; ++I) {
SDDbgValue *Dbg = *I;
- if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ // Only add Dbgvalues attached to same ResNo.
+ if (Dbg->getKind() == SDDbgValue::SDNODE &&
+ Dbg->getSDNode() == From.getNode() &&
+ Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
+ assert(FromNode != ToNode &&
+ "Should not transfer Debug Values intranode");
SDDbgValue *Clone =
getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
Dbg->getDebugLoc(), Dbg->getOrder());
ClonedDVs.push_back(Clone);
+ Dbg->setIsInvalidated();
}
}
- for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(),
- E = ClonedDVs.end(); I != E; ++I)
- AddDbgValue(*I, ToNode, false);
+ for (SDDbgValue *I : ClonedDVs)
+ AddDbgValue(I, ToNode, false);
}
//===----------------------------------------------------------------------===//
@@ -6724,26 +6685,31 @@ bool llvm::isOneConstant(SDValue V) {
return Const != nullptr && Const->isOne();
}
+bool llvm::isBitwiseNot(SDValue V) {
+ return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
- DebugLoc DL, const GlobalValue *GA,
- EVT VT, int64_t o, unsigned char TF)
- : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ const DebugLoc &DL,
+ const GlobalValue *GA, EVT VT,
+ int64_t o, unsigned char TF)
+ : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
-AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT,
- SDValue X, unsigned SrcAS,
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
+ EVT VT, unsigned SrcAS,
unsigned DestAS)
- : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X),
- SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+ : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
-MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
- EVT memvt, MachineMemOperand *mmo)
- : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
+ : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
@@ -6755,16 +6721,6 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
}
-MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
- ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo)
- : SDNode(Opc, Order, dl, VTs, Ops),
- MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(), MMO->isInvariant());
- assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
- assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
-}
-
/// Profile - Gather unique data for the node.
///
void SDNode::Profile(FoldingSetNodeID &ID) const {
@@ -6894,44 +6850,13 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
return false;
}
-/// hasPredecessor - Return true if N is a predecessor of this node.
-/// N is either an operand of this node, or can be reached by recursively
-/// traversing up the operands.
-/// NOTE: This is an expensive method. Use it carefully.
bool SDNode::hasPredecessor(const SDNode *N) const {
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(this);
return hasPredecessorHelper(N, Visited, Worklist);
}
-bool
-SDNode::hasPredecessorHelper(const SDNode *N,
- SmallPtrSetImpl<const SDNode *> &Visited,
- SmallVectorImpl<const SDNode *> &Worklist) const {
- if (Visited.empty()) {
- Worklist.push_back(this);
- } else {
- // Take a look in the visited set. If we've already encountered this node
- // we needn't search further.
- if (Visited.count(N))
- return true;
- }
-
- // Haven't visited N yet. Continue the search.
- while (!Worklist.empty()) {
- const SDNode *M = Worklist.pop_back_val();
- for (const SDValue &OpV : M->op_values()) {
- SDNode *Op = OpV.getNode();
- if (Visited.insert(Op).second)
- Worklist.push_back(Op);
- if (Op == N)
- return true;
- }
- }
-
- return false;
-}
-
uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
assert(Num < NumOperands && "Invalid child # of SDNode!");
return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
@@ -7018,12 +6943,14 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
}
-
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
-/// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
- unsigned Bytes, int Dist) const {
+bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
+ LoadSDNode *Base,
+ unsigned Bytes,
+ int Dist) const {
+ if (LD->isVolatile() || Base->isVolatile())
+ return false;
+ if (LD->isIndexed() || Base->isIndexed())
+ return false;
if (LD->getChain() != Base->getChain())
return false;
EVT VT = LD->getValueType(0);
@@ -7204,7 +7131,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
SDValue OpVal = getOperand(i);
unsigned BitPos = j * EltBitSize;
- if (OpVal.getOpcode() == ISD::UNDEF)
+ if (OpVal.isUndef())
SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
@@ -7250,7 +7177,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
SDValue Splatted;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
SDValue Op = getOperand(i);
- if (Op.getOpcode() == ISD::UNDEF) {
+ if (Op.isUndef()) {
if (UndefElements)
(*UndefElements)[i] = true;
} else if (!Splatted) {
@@ -7261,7 +7188,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
}
if (!Splatted) {
- assert(getOperand(0).getOpcode() == ISD::UNDEF &&
+ assert(getOperand(0).isUndef() &&
"Can only have a splat without a constant for all undefs.");
return getOperand(0);
}
@@ -7286,7 +7213,7 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
bool IsExact;
APSInt IntVal(BitWidth);
- APFloat APF = CN->getValueAPF();
+ const APFloat &APF = CN->getValueAPF();
if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
APFloat::opOK ||
!IsExact)
@@ -7322,6 +7249,22 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ // Treat a GlobalAddress supporting constant offset folding as a
+ // constant integer.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI->isOffsetFoldingLegal(GA))
+ return GA;
+ return nullptr;
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
@@ -7353,9 +7296,9 @@ void llvm::checkForCycles(const llvm::SDNode *N,
bool force) {
#ifndef NDEBUG
bool check = force;
-#ifdef XDEBUG
+#ifdef EXPENSIVE_CHECKS
check = true;
-#endif // XDEBUG
+#endif // EXPENSIVE_CHECKS
if (check) {
assert(N && "Checking nonexistent SDNode");
SmallPtrSet<const SDNode*, 32> visited;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 45ae39a..e1fc37d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -20,9 +20,11 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -34,6 +36,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallingConv.h"
@@ -42,6 +45,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -61,7 +65,6 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <utility>
@@ -84,6 +87,19 @@ static cl::opt<bool>
EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
cl::desc("Enable fast-math-flags for DAG nodes"));
+/// Minimum jump table density for normal functions.
+static cl::opt<unsigned>
+JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "a normal function"));
+
+/// Minimum jump table density for -Os or -Oz functions.
+static cl::opt<unsigned>
+OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "an optsize function"));
+
+
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
@@ -94,26 +110,25 @@ EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
// MaxParallelChains default is arbitrarily high to avoid affecting
// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
// sequence over this should have been converted to llvm.memcpy by the
-// frontend. It easy to induce this behavior with .ll code such as:
+// frontend. It is easy to induce this behavior with .ll code such as:
// %buffer = alloca [4096 x i8]
// %data = load [4096 x i8]* %argPtr
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
-static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// larger than ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
- const SDValue *Parts,
- unsigned NumParts, MVT PartVT, EVT ValueVT,
- const Value *V,
- ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ Optional<ISD::NodeType> AssertOp = None) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
PartVT, ValueVT, V);
@@ -193,6 +208,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
}
// There is now one part, held in Val. Correct it to match ValueVT.
+ // PartEVT is the type of the register class that holds the value.
+ // ValueVT is the type of the inline asm operation.
EVT PartEVT = Val.getValueType();
if (PartEVT == ValueVT)
@@ -206,13 +223,18 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
}
+ // Handle types that have the same size.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle types with different sizes.
if (PartEVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartEVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
- if (AssertOp != ISD::DELETED_NODE)
- Val = DAG.getNode(AssertOp, DL, PartEVT, Val,
+ if (AssertOp.hasValue())
+ Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
DAG.getValueType(ValueVT));
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
@@ -229,9 +251,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
- if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
-
llvm_unreachable("Unknown mismatch!");
}
@@ -251,10 +270,10 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
/// getCopyFromPartsVector - Create a value that contains the specified legal
/// parts combined into the value they represent. If the parts combine to a
-/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// type larger than ValueVT then AssertOp can be used to specify whether the
/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
/// ValueVT (ISD::AssertSext).
-static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V) {
assert(ValueVT.isVector() && "Not a vector value");
@@ -353,16 +372,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
}
-static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl,
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
- SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V,
+static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ const Value *V,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
@@ -427,9 +446,11 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
"Failed to tile the value with PartVT!");
if (NumParts == 1) {
- if (PartEVT != ValueVT)
+ if (PartEVT != ValueVT) {
diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
"scalar-to-vector conversion failed");
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
Parts[0] = Val;
return;
@@ -489,7 +510,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
/// getCopyToPartsVector - Create a series of nodes that contain the specified
/// value split into legal parts.
-static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V) {
EVT ValueVT = Val.getValueType();
@@ -618,9 +639,8 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
- SDLoc dl,
- SDValue &Chain, SDValue *Flag,
- const Value *V) const {
+ const SDLoc &dl, SDValue &Chain,
+ SDValue *Flag, const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -676,25 +696,33 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// now, just use the tightest assertzext/assertsext possible.
bool isSExt = true;
EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
- else
+ if (NumSignBits == RegSize) {
+ isSExt = true; // ASSERT SEXT 1
+ FromVT = MVT::i1;
+ } else if (NumZeroBits >= RegSize - 1) {
+ isSExt = false; // ASSERT ZEXT 1
+ FromVT = MVT::i1;
+ } else if (NumSignBits > RegSize - 8) {
+ isSExt = true; // ASSERT SEXT 8
+ FromVT = MVT::i8;
+ } else if (NumZeroBits >= RegSize - 8) {
+ isSExt = false; // ASSERT ZEXT 8
+ FromVT = MVT::i8;
+ } else if (NumSignBits > RegSize - 16) {
+ isSExt = true; // ASSERT SEXT 16
+ FromVT = MVT::i16;
+ } else if (NumZeroBits >= RegSize - 16) {
+ isSExt = false; // ASSERT ZEXT 16
+ FromVT = MVT::i16;
+ } else if (NumSignBits > RegSize - 32) {
+ isSExt = true; // ASSERT SEXT 32
+ FromVT = MVT::i32;
+ } else if (NumZeroBits >= RegSize - 32) {
+ isSExt = false; // ASSERT ZEXT 32
+ FromVT = MVT::i32;
+ } else {
continue;
-
+ }
// Add an assertion node.
assert(FromVT != MVT::Other);
Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
@@ -714,8 +742,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
/// specified value into the registers specified by this object. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
-void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
- SDValue &Chain, SDValue *Flag, const Value *V,
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ISD::NodeType ExtendKind = PreferredExtendType;
@@ -770,7 +799,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
/// operand list. This adds the code marker and includes the number of
/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
- unsigned MatchingIdx, SDLoc dl,
+ unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -902,10 +931,48 @@ SDValue SelectionDAGBuilder::getControlRoot() {
return Root;
}
+/// Copy swift error to the final virtual register at end of a basic block, as
+/// specified by SwiftErrorWorklist, if necessary.
+static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) {
+ const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo();
+ if (!TLI.supportSwiftError())
+ return;
+
+ if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB))
+ return;
+
+ // Go through entries in SwiftErrorWorklist, and create copy as necessary.
+ FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry =
+ SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB];
+ FunctionLoweringInfo::SwiftErrorVRegs &MapEntry =
+ SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB];
+ for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) {
+ unsigned WorkReg = WorklistEntry[I];
+
+ // Find the swifterror virtual register for the value in SwiftErrorMap.
+ unsigned MapReg = MapEntry[I];
+ assert(TargetRegisterInfo::isVirtualRegister(MapReg) &&
+ "Entries in SwiftErrorMap should be virtual registers");
+
+ if (WorkReg == MapReg)
+ continue;
+
+ // Create copy from SwiftErrorMap to SwiftWorklist.
+ auto &DL = SDB.DAG.getDataLayout();
+ SDValue CopyNode = SDB.DAG.getCopyToReg(
+ SDB.getRoot(), SDB.getCurSDLoc(), WorkReg,
+ SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL))));
+ MapEntry[I] = WorkReg;
+ SDB.DAG.setRoot(CopyNode);
+ }
+}
+
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
- if (isa<TerminatorInst>(&I))
+ if (isa<TerminatorInst>(&I)) {
+ copySwiftErrorsToFinalVRegs(*this);
HandlePHINodesInSuccessorBlocks(I.getParent());
+ }
++SDNodeOrder;
@@ -992,10 +1059,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If there's a virtual register allocated and initialized for this
// value, use it.
- SDValue copyFromReg = getCopyFromRegs(V, V->getType());
- if (copyFromReg.getNode()) {
+ if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
return copyFromReg;
- }
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
@@ -1206,7 +1271,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// This will be used by the FuncletLayout pass to determine how to order the
// BB's.
// A 'catchret' returns to the outer scope's color.
- Value *ParentPad = I.getParentPad();
+ Value *ParentPad = I.getCatchSwitchParentPad();
const BasicBlock *SuccessorColor;
if (isa<ConstantTokenNone>(ParentPad))
SuccessorColor = &FuncInfo.Fn->getEntryBlock();
@@ -1314,6 +1379,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<ISD::OutputArg, 8> Outs;
SmallVector<SDValue, 8> OutVals;
+ // Calls to @llvm.experimental.deoptimize don't generate a return value, so
+ // lower
+ //
+ // %val = call <ty> @llvm.experimental.deoptimize()
+ // ret <ty> %val
+ //
+ // differently.
+ if (I.getParent()->getTerminatingDeoptimizeCall()) {
+ LowerDeoptimizingReturn();
+ return;
+ }
+
if (!FuncInfo.CanLowerReturn) {
unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
@@ -1346,11 +1423,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DAG.getIntPtrConstant(Offsets[i],
getCurSDLoc()),
&Flags);
- Chains[i] =
- DAG.getStore(Chain, getCurSDLoc(),
- SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- // FIXME: better loc info would be nice.
- Add, MachinePointerInfo(), false, false, 0);
+ Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo());
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1380,7 +1456,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
- VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
+ VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
unsigned NumParts = TLI.getNumRegisters(Context, VT);
MVT PartVT = TLI.getRegisterType(Context, VT);
@@ -1409,6 +1485,23 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
}
}
+ // Push in swifterror virtual register as the last element of Outs. This makes
+ // sure swifterror virtual register will be returned in the swifterror
+ // physical register.
+ const Function *F = I.getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ Flags.setSwiftError();
+ Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
+ EVT(TLI.getPointerTy(DL)) /*argvt*/,
+ true /*isfixed*/, 1 /*origidx*/,
+ 0 /*partOffs*/));
+ // Create SDNode for the swifterror virtual register.
+ OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0],
+ EVT(TLI.getPointerTy(DL))));
+ }
+
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction()->getCallingConv();
@@ -1906,6 +1999,27 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
DAG.setRoot(BrCond);
}
+/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
+/// variable if there exists one.
+static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue &Chain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
+ MachineSDNode *Node =
+ DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
+ if (Global) {
+ MachinePointerInfo MPInfo(Global);
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
+ DAG.getEVTAlignment(PtrTy));
+ Node->setMemRefs(MemRefs, MemRefs + 1);
+ }
+ return SDValue(Node, 0);
+}
+
/// Codegen a new tail for a stack protector check ParentMBB which has had its
/// tail spliced into a stack protector check success bb.
///
@@ -1922,32 +2036,59 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI->getStackProtectorIndex();
- const Value *IRGuard = SPD.getGuard();
- SDValue GuardPtr = getValue(IRGuard);
+ SDValue Guard;
+ SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+ const Module &M = *ParentBB->getParent()->getFunction()->getParent();
+ unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
- unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType());
+ // Generate code to load the content of the guard slot.
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
+ MachineMemOperand::MOVolatile);
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ auto *Fn = cast<Function>(GuardCheck);
+ FunctionType *FnTy = Fn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
- SDValue Guard;
- SDLoc dl = getCurSDLoc();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = StackSlot;
+ Entry.Ty = FnTy->getParamType(0);
+ if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Entry.isInReg = true;
+ Args.push_back(Entry);
- // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the
- // guard value from the virtual register holding the value. Otherwise, emit a
- // volatile load to retrieve the stack guard value.
- unsigned GuardReg = SPD.getGuardReg();
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(DAG.getEntryNode())
+ .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheck), std::move(Args));
- if (GuardReg && TLI.useLoadStackGuardNode())
- Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg,
- PtrTy);
- else
- Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
- GuardPtr, MachinePointerInfo(IRGuard, 0),
- true, false, false, Align);
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
- SDValue StackSlot = DAG.getLoad(
- PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
- false, false, Align);
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ SDValue Chain = DAG.getEntryNode();
+ if (TLI.useLoadStackGuardNode()) {
+ Guard = getLoadStackGuard(DAG, dl, Chain);
+ } else {
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ SDValue GuardPtr = getValue(IRGuard);
+
+ Guard =
+ DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
+ Align, MachineMemOperand::MOVolatile);
+ }
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
@@ -2115,6 +2256,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
const BasicBlock *EHPadBB = I.getSuccessor(1);
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower invokes with arbitrary operand bundles yet!");
+
const Value *Callee(I.getCalledValue());
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
@@ -2134,8 +2281,15 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
}
- } else
+ } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
+ // Currently we do not lower any intrinsic calls with deopt operand bundles.
+ // Eventually we will support lowering the @llvm.experimental.deoptimize
+ // intrinsic, and right now there are no plans to support other intrinsics
+ // with deopt state.
+ LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+ } else {
LowerCallTo(&I, getValue(Callee), false, EHPadBB);
+ }
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
@@ -2309,6 +2463,129 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
visitBinary(I, ISD::FSUB);
}
+/// Checks if the given instruction performs a vector reduction, in which case
+/// we have the freedom to alter the elements in the result as long as the
+/// reduction of them stays unchanged.
+static bool isVectorReductionOp(const User *I) {
+ const Instruction *Inst = dyn_cast<Instruction>(I);
+ if (!Inst || !Inst->getType()->isVectorTy())
+ return false;
+
+ auto OpCode = Inst->getOpcode();
+ switch (OpCode) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (FPOp->getFastMathFlags().unsafeAlgebra())
+ break;
+ // Fall through.
+ default:
+ return false;
+ }
+
+ unsigned ElemNum = Inst->getType()->getVectorNumElements();
+ unsigned ElemNumToReduce = ElemNum;
+
+ // Do DFS search on the def-use chain from the given instruction. We only
+ // allow four kinds of operations during the search until we reach the
+ // instruction that extracts the first element from the vector:
+ //
+ // 1. The reduction operation of the same opcode as the given instruction.
+ //
+ // 2. PHI node.
+ //
+ // 3. ShuffleVector instruction together with a reduction operation that
+ // does a partial reduction.
+ //
+ // 4. ExtractElement that extracts the first element from the vector, and we
+ // stop searching the def-use chain here.
+ //
+ // 3 & 4 above perform a reduction on all elements of the vector. We push defs
+ // from 1-3 to the stack to continue the DFS. The given instruction is not
+ // a reduction operation if we meet any other instructions other than those
+ // listed above.
+
+ SmallVector<const User *, 16> UsersToVisit{Inst};
+ SmallPtrSet<const User *, 16> Visited;
+ bool ReduxExtracted = false;
+
+ while (!UsersToVisit.empty()) {
+ auto User = UsersToVisit.back();
+ UsersToVisit.pop_back();
+ if (!Visited.insert(User).second)
+ continue;
+
+ for (const auto &U : User->users()) {
+ auto Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ return false;
+
+ if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
+ return false;
+ UsersToVisit.push_back(U);
+ } else if (const ShuffleVectorInst *ShufInst =
+ dyn_cast<ShuffleVectorInst>(U)) {
+ // Detect the following pattern: A ShuffleVector instruction together
+ // with a reduction that do partial reduction on the first and second
+ // ElemNumToReduce / 2 elements, and store the result in
+ // ElemNumToReduce / 2 elements in another vector.
+
+ unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
+ if (ResultElements < ElemNum)
+ return false;
+
+ if (ElemNumToReduce == 1)
+ return false;
+ if (!isa<UndefValue>(U->getOperand(1)))
+ return false;
+ for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
+ if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
+ return false;
+ for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
+ if (ShufInst->getMaskValue(i) != -1)
+ return false;
+
+ // There is only one user of this ShuffleVector instruction, which
+ // must be a reduction operation.
+ if (!U->hasOneUse())
+ return false;
+
+ auto U2 = dyn_cast<Instruction>(*U->user_begin());
+ if (!U2 || U2->getOpcode() != OpCode)
+ return false;
+
+ // Check operands of the reduction operation.
+ if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
+ (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
+ UsersToVisit.push_back(U2);
+ ElemNumToReduce /= 2;
+ } else
+ return false;
+ } else if (isa<ExtractElementInst>(U)) {
+ // At this moment we should have reduced all elements in the vector.
+ if (ElemNumToReduce != 1)
+ return false;
+
+ const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!Val || Val->getZExtValue() != 0)
+ return false;
+
+ ReduxExtracted = true;
+ } else
+ return false;
+ }
+ }
+ return ReduxExtracted;
+}
+
void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -2316,6 +2593,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
bool nuw = false;
bool nsw = false;
bool exact = false;
+ bool vec_redux = false;
FastMathFlags FMF;
if (const OverflowingBinaryOperator *OFBinOp =
@@ -2329,10 +2607,16 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
FMF = FPOp->getFastMathFlags();
+ if (isVectorReductionOp(&I)) {
+ vec_redux = true;
+ DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+ }
+
SDNodeFlags Flags;
Flags.setExact(exact);
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
+ Flags.setVectorReduction(vec_redux);
if (EnableFMFInDAG) {
Flags.setAllowReciprocal(FMF.allowReciprocal());
Flags.setNoInfs(FMF.noInfs());
@@ -2433,7 +2717,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
-
+
// FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
// FIXME: We should propagate the fast-math-flags to the DAG node itself for
// further optimization, but currently FMF is only applicable to binary nodes.
@@ -2444,6 +2728,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
}
+// Check if the condition of the select has one use or two users that are both
+// selects with the same condition.
+static bool hasOnlySelectUsers(const Value *Cond) {
+ return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) {
+ return isa<SelectInst>(V);
+ });
+}
+
void SelectionDAGBuilder::visitSelect(const User &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
@@ -2529,7 +2821,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// If the underlying comparison instruction is used by any other
// instruction, the consumed instructions won't be destroyed, so it is
// not profitable to convert to a min/max.
- cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
+ hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
OpCode = Opc;
LHSVal = getValue(LHS);
RHSVal = getValue(RHS);
@@ -2703,17 +2995,6 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
InVec, InIdx));
}
-// Utility for visitShuffleVector - Return true if every element in Mask,
-// beginning from position Pos and ending in Pos+Size, falls within the
-// specified sequential range [L, L+Pos). or is undef.
-static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
- unsigned Pos, unsigned Size, int Low) {
- for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
- if (Mask[i] >= 0 && Mask[i] != Low)
- return false;
- return true;
-}
-
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
@@ -2728,8 +3009,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
unsigned SrcNumElts = SrcVT.getVectorNumElements();
if (SrcNumElts == MaskNumElts) {
- setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
- &Mask[0]));
+ setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask));
return;
}
@@ -2738,29 +3018,46 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// Mask is longer than the source vectors and is a multiple of the source
// vectors. We can use concatenate vector to make the mask and vectors
// lengths match.
- if (SrcNumElts*2 == MaskNumElts) {
- // First check for Src1 in low and Src2 in high
- if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
- isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
- VT, Src1, Src2));
- return;
+
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+
+ // Check if the shuffle is some kind of concatenation of the input vectors.
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
}
- // Then check for Src2 in low and Src1 in high
- if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
- isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
- // The shuffle is concatenating two vectors together.
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
- VT, Src2, Src1));
- return;
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
}
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
+ VT, ConcatOps));
+ return;
}
// Pad both vectors with undefs to make them the same length as the mask.
- unsigned NumConcat = MaskNumElts / SrcNumElts;
- bool Src1U = Src1.getOpcode() == ISD::UNDEF;
- bool Src2U = Src2.getOpcode() == ISD::UNDEF;
SDValue UndefVal = DAG.getUNDEF(SrcVT);
SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
@@ -2768,10 +3065,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps1[0] = Src1;
MOps2[0] = Src2;
- Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT, MOps1);
- Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT, MOps2);
+ Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
+ : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurSDLoc(), VT, MOps1);
+ Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
+ : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurSDLoc(), VT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps;
@@ -2783,7 +3082,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
- &MappedOps[0]));
+ MappedOps));
return;
}
@@ -2864,7 +3163,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
- &MappedOps[0]));
+ MappedOps));
return;
}
}
@@ -2982,8 +3281,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Value *Op0 = I.getOperand(0);
// Note that the pointer operand may be a vector of pointers. Take the scalar
// element which holds a pointer.
- Type *Ty = Op0->getType()->getScalarType();
- unsigned AS = Ty->getPointerAddressSpace();
+ unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
@@ -2993,14 +3291,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
cast<VectorType>(I.getType())->getVectorNumElements() : 0;
if (VectorWidth && !N.getValueType().isVector()) {
- MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
+ LLVMContext &Context = *DAG.getContext();
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
SmallVector<SDValue, 16> Ops(VectorWidth, N);
N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
- for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
- OI != E; ++OI) {
- const Value *Idx = *OI;
- if (StructType *StTy = dyn_cast<StructType>(Ty)) {
+ for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = dyn_cast<StructType>(*GTI)) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
@@ -3015,14 +3314,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
}
-
- Ty = StTy->getElementType(Field);
} else {
- Ty = cast<SequentialType>(Ty)->getElementType();
MVT PtrTy =
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
unsigned PtrSize = PtrTy.getSizeInBits();
- APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
@@ -3055,7 +3351,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && VectorWidth) {
MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
- IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
@@ -3144,7 +3440,22 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (I.isAtomic())
return visitAtomicLoad(I);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Value *SV = I.getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitLoadFromSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return visitLoadFromSwiftError(I);
+ }
+ }
+
SDValue Ptr = getValue(SV);
Type *Ty = I.getType();
@@ -3168,7 +3479,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
@@ -3223,10 +3533,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
&Flags);
- SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
- A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
- isNonTemporal, isInvariant, Alignment, AAInfo,
- Ranges);
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (isVolatile)
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ MMOFlags |= MachineMemOperand::MOInvariant;
+
+ SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
+ MachinePointerInfo(SV, Offsets[i]), Alignment,
+ MMOFlags, AAInfo, Ranges);
Values[i] = L;
Chains[ChainI] = L.getValue(1);
@@ -3245,6 +3562,64 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
DAG.getVTList(ValueVTs), Values));
}
+void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert(TLI.supportSwiftError() &&
+ "call visitStoreToSwiftError when backend supports swifterror");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ const Value *SrcV = I.getOperand(0);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ SDValue Src = getValue(SrcV);
+ // Create a virtual register, then update the virtual register.
+ auto &DL = DAG.getDataLayout();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
+ // Chain can be getRoot or getControlRoot.
+ SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
+ SDValue(Src.getNode(), Src.getResNo()));
+ DAG.setRoot(CopyNode);
+ FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+}
+
+void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitLoadFromSwiftError when backend supports swifterror");
+
+ assert(!I.isVolatile() &&
+ I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
+ I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
+ "Support volatile, non temporal, invariant for load_from_swift_error");
+
+ const Value *SV = I.getOperand(0);
+ Type *Ty = I.getType();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ assert(!AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) &&
+ "load_from_swift_error should not be constant memory");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
+ ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
+ SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(),
+ FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV),
+ ValueVTs[0]);
+
+ setValue(&I, L);
+}
+
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
if (I.isAtomic())
return visitAtomicStore(I);
@@ -3252,6 +3627,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
const Value *SrcV = I.getOperand(0);
const Value *PtrV = I.getOperand(1);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitStoreToSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return visitStoreToSwiftError(I);
+ }
+ }
+
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
@@ -3268,15 +3658,18 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Root = getRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ SDLoc dl = getCurSDLoc();
EVT PtrVT = Ptr.getValueType();
- bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
unsigned Alignment = I.getAlignment();
- SDLoc dl = getCurSDLoc();
-
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (I.isVolatile())
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
SDNodeFlags Flags;
@@ -3293,10 +3686,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
- SDValue St = DAG.getStore(Root, dl,
- SDValue(Src.getNode(), Src.getResNo() + i),
- Add, MachinePointerInfo(PtrV, Offsets[i]),
- isVolatile, isNonTemporal, Alignment, AAInfo);
+ SDValue St = DAG.getStore(
+ Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
+ MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
@@ -3447,13 +3839,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
- SDValue InChain = DAG.getRoot();
- if (AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
- AAInfo))) {
- // Do not serialize (non-volatile) loads of constant memory with anything.
- InChain = DAG.getEntryNode();
- }
+ // Do not serialize masked loads of constant memory with anything.
+ bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation(
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO =
DAG.getMachineFunction().
@@ -3463,8 +3852,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
ISD::NON_EXTLOAD);
- SDValue OutChain = Load.getValue(1);
- DAG.setRoot(OutChain);
+ if (AddToChain) {
+ SDValue OutChain = Load.getValue(1);
+ DAG.setRoot(OutChain);
+ }
setValue(&I, Load);
}
@@ -3585,7 +3976,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant(I.getOrdering(), dl,
+ Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
TLI.getPointerTy(DAG.getDataLayout()));
Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
TLI.getPointerTy(DAG.getDataLayout()));
@@ -3724,7 +4115,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
- }
+ } else
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
setValue(&I, Result);
}
@@ -3736,8 +4128,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
/// Op = (Op & 0x007fffff) | 0x3f800000;
///
/// where Op is the hexadecimal representation of floating point value.
-static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) {
+static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x007fffff, dl, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
@@ -3750,9 +4141,8 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) {
/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
///
/// where Op is the hexadecimal representation of floating point value.
-static SDValue
-GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
- SDLoc dl) {
+static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
+ const TargetLowering &TLI, const SDLoc &dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
SDValue t1 = DAG.getNode(
@@ -3764,13 +4154,13 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
}
/// getF32Constant - Get 32-bit floating point constant.
-static SDValue
-getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
+static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
+ const SDLoc &dl) {
return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
MVT::f32);
}
-static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
+static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SelectionDAG &DAG) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
@@ -3862,7 +4252,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
-static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
+static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
@@ -3885,9 +4275,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
-static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
+static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
+
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -3984,9 +4374,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
-static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
+static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
+
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4082,7 +4472,7 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
-static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
+static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
@@ -4173,7 +4563,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
-static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
+static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
@@ -4185,7 +4575,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
-static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
+static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const TargetLowering &TLI) {
bool IsExp10 = false;
if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
@@ -4214,7 +4604,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
/// ExpandPowI - Expand a llvm.powi intrinsic.
-static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
+static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
// If RHS is a constant, we can expand this out to a multiplication tree,
// otherwise we end up lowering to a call to __powidf2 (for example). When
@@ -4609,18 +4999,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
return nullptr;
case Intrinsic::eh_dwarf_cfa: {
- SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
- TLI.getPointerTy(DAG.getDataLayout()));
- SDValue Offset = DAG.getNode(ISD::ADD, sdl,
- CfaArg.getValueType(),
- DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
- CfaArg.getValueType()),
- CfaArg);
- SDValue FA = DAG.getNode(
- ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()),
- DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
- setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
- FA, Offset));
+ setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
return nullptr;
}
case Intrinsic::eh_sjlj_callsite: {
@@ -4798,7 +5179,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
- case Intrinsic::round: {
+ case Intrinsic::round:
+ case Intrinsic::canonicalize: {
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -4812,6 +5194,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
}
setValue(&I, DAG.getNode(Opcode, sdl,
@@ -4819,18 +5202,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return nullptr;
}
- case Intrinsic::minnum:
- setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
+ case Intrinsic::minnum: {
+ auto VT = getValue(I.getArgOperand(0)).getValueType();
+ unsigned Opc =
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
+ ? ISD::FMINNAN
+ : ISD::FMINNUM;
+ setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return nullptr;
- case Intrinsic::maxnum:
- setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
+ }
+ case Intrinsic::maxnum: {
+ auto VT = getValue(I.getArgOperand(0)).getValueType();
+ unsigned Opc =
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
+ ? ISD::FMAXNAN
+ : ISD::FMAXNUM;
+ setValue(&I, DAG.getNode(Opc, sdl, VT,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
return nullptr;
+ }
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -4954,47 +5347,35 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
+ case Intrinsic::stackguard: {
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Module &M = *MF.getFunction()->getParent();
+ SDValue Chain = getRoot();
+ if (TLI.useLoadStackGuardNode()) {
+ Res = getLoadStackGuard(DAG, sdl, Chain);
+ } else {
+ const Value *Global = TLI.getSDagStackGuard(M);
+ unsigned Align = DL->getPrefTypeAlignment(Global->getType());
+ Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
+ MachinePointerInfo(Global, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+ DAG.setRoot(Chain);
+ setValue(&I, Res);
+ return nullptr;
+ }
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Src, Chain = getRoot();
- const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
-
- // See if Ptr is a bitcast. If it is, look through it and see if we can get
- // global variable __stack_chk_guard.
- if (!GV)
- if (const Operator *BC = dyn_cast<Operator>(Ptr))
- if (BC->getOpcode() == Instruction::BitCast)
- GV = dyn_cast<GlobalVariable>(BC->getOperand(0));
-
- if (GV && TLI.useLoadStackGuardNode()) {
- // Emit a LOAD_STACK_GUARD node.
- MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD,
- sdl, PtrTy, Chain);
- MachinePointerInfo MPInfo(GV);
- MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
- unsigned Flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MOInvariant;
- *MemRefs = MF.getMachineMemOperand(MPInfo, Flags,
- PtrTy.getSizeInBits() / 8,
- DAG.getEVTAlignment(PtrTy));
- Node->setMemRefs(MemRefs, MemRefs + 1);
-
- // Copy the guard value to a virtual register so that it can be
- // retrieved in the epilogue.
- Src = SDValue(Node, 0);
- const TargetRegisterClass *RC =
- TLI.getRegClassFor(Src.getSimpleValueType());
- unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
-
- SPDescriptor.setGuardReg(Reg);
- Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src);
- } else {
+
+ if (TLI.useLoadStackGuardNode())
+ Src = getLoadStackGuard(DAG, sdl, Chain);
+ else
Src = getValue(I.getArgOperand(0)); // The guard's value.
- }
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
@@ -5006,7 +5387,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Store the stack protector onto the stack.
Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), FI),
- true, false, 0);
+ /* Alignment = */ 0, MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
return nullptr;
@@ -5060,15 +5441,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0))));
return nullptr;
}
- case Intrinsic::gcroot:
- if (GFI) {
- const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
- const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
-
- FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
- GFI->addStackRoot(FI->getIndex(), TypeMap);
- }
+ case Intrinsic::gcroot: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ (void)F;
+ assert(F->hasGC() &&
+ "only valid in functions with gc specified, enforced by Verifier");
+ assert(GFI && "implied by previous");
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
return nullptr;
+ }
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
@@ -5101,7 +5487,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args), 0);
+ std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -5193,18 +5579,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::invariant_end:
// Discard region information.
return nullptr;
- case Intrinsic::stackprotectorcheck: {
- // Do not actually emit anything for this basic block. Instead we initialize
- // the stack protector descriptor and export the guard variable so we can
- // access it in FinishBasicBlock.
- const BasicBlock *BB = I.getParent();
- SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I);
- ExportFromCurrentBlock(SPDescriptor.getGuard());
-
- // Flush our exports since we are going to process a terminator.
- (void)getControlRoot();
- return nullptr;
- }
case Intrinsic::clear_cache:
return TLI.getClearCacheBuiltinName();
case Intrinsic::donothing:
@@ -5220,11 +5594,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::experimental_gc_statepoint: {
- visitStatepoint(I);
+ LowerStatepoint(ImmutableStatepoint(&I));
return nullptr;
}
case Intrinsic::experimental_gc_result: {
- visitGCResult(I);
+ visitGCResult(cast<GCResultInst>(I));
return nullptr;
}
case Intrinsic::experimental_gc_relocate: {
@@ -5303,6 +5677,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, N);
return nullptr;
}
+
+ case Intrinsic::experimental_deoptimize:
+ LowerDeoptimizeCall(&I);
+ return nullptr;
}
}
@@ -5378,14 +5756,16 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
const BasicBlock *EHPadBB) {
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- Type *RetTy = FTy->getReturnType();
+ auto &DL = DAG.getDataLayout();
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
+ const Value *SwiftErrorVal = nullptr;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
@@ -5399,6 +5779,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Skip the first return-type Attribute to get to params.
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+
+ // Use swifterror virtual register as input to the call.
+ if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ SwiftErrorVal = V;
+ // We find the virtual register for the actual swifterror argument.
+ // Instead of using the Value, we use the virtual register instead.
+ Entry.Node = DAG.getRegister(
+ FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
+ }
+
Args.push_back(Entry);
// If we have an explicit sret argument that is an Instruction, (i.e., it
@@ -5413,13 +5804,32 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
isTailCall = false;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
- .setTailCall(isTailCall);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setTailCall(isTailCall)
+ .setConvergent(CS.isConvergent());
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
- if (Result.first.getNode())
- setValue(CS.getInstruction(), Result.first);
+ if (Result.first.getNode()) {
+ const Instruction *Inst = CS.getInstruction();
+ Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
+ setValue(Inst, Result.first);
+ }
+
+ // The last element of CLI.InVals has the SDValue for swifterror return.
+ // Here we copy it to a virtual register and update SwiftErrorMap for
+ // book-keeping.
+ if (SwiftErrorVal && TLI.supportSwiftError()) {
+ // Get the last element of InVals.
+ SDValue Src = CLI.InVals.back();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
+ // We update the virtual register for the actual swifterror argument.
+ FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+ DAG.setRoot(CopyNode);
+ }
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -5449,7 +5859,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
PointerType::getUnqual(LoadTy));
if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
- const_cast<Constant *>(LoadInput), *Builder.DL))
+ const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
return Builder.getValue(LoadCst);
}
@@ -5470,9 +5880,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
Ptr, MachinePointerInfo(PtrVal),
- false /*volatile*/,
- false /*nontemporal*/,
- false /*isinvariant*/, 1 /* align=1 */);
+ /* Alignment = */ 1);
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5516,7 +5924,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return true;
}
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(LHS), getValue(RHS), getValue(Size),
@@ -5613,7 +6021,7 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
!I.getType()->isPointerTy())
return false;
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Src), getValue(Char), getValue(Length),
@@ -5641,7 +6049,7 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
!I.getType()->isPointerTy())
return false;
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
getValue(Arg0), getValue(Arg1),
@@ -5670,7 +6078,7 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
!I.getType()->isIntegerTy())
return false;
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
@@ -5697,7 +6105,7 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
return false;
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), MachinePointerInfo(Arg0));
@@ -5724,7 +6132,7 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
!I.getType()->isIntegerTy())
return false;
- const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
@@ -5803,9 +6211,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
// Check for well-known libc/libm calls. If the function is internal, it
- // can't be a library call.
+ // can't be a library call. Don't do the check if marked as nobuiltin for
+ // some reason.
LibFunc::Func Func;
- if (!F->hasLocalLinkage() && F->hasName() &&
+ if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
@@ -5952,9 +6361,19 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
RenameFn,
DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
- // Check if we can potentially perform a tail call. More detailed checking is
- // be done within LowerCallTo, after more information about the call is known.
- LowerCallTo(&I, Callee, I.isTailCall());
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower calls with arbitrary operand bundles!");
+
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
+ else
+ // Check if we can potentially perform a tail call. More detailed checking
+ // is be done within LowerCallTo, after more information about the call is
+ // known.
+ LowerCallTo(&I, Callee, I.isTailCall());
}
namespace {
@@ -6036,9 +6455,8 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
///
/// OpInfo describes the operand.
///
-static void GetRegistersForValue(SelectionDAG &DAG,
- const TargetLowering &TLI,
- SDLoc DL,
+static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
+ const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo) {
LLVMContext &Context = *DAG.getContext();
@@ -6301,8 +6719,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
Chain = DAG.getStore(
Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
- false, false, 0);
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
OpInfo.CallOperand = StackSlot;
}
@@ -6349,6 +6766,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ if (CS.isConvergent())
+ ExtraInfo |= InlineAsm::Extra_IsConvergent;
// Set the asm dialect.
ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
@@ -6413,10 +6832,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the output from the appropriate register. Find a register that
// we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(),
- "couldn't allocate output register for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(
+ CS, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
@@ -6469,10 +6887,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
if (OpInfo.isIndirect) {
// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:"
- " don't know how to handle tied "
- "indirect register inputs");
+ emitInlineAsmError(CS, "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
return;
}
@@ -6486,10 +6903,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
else {
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(),
- "inline asm error: This value"
- " type register class is not natively supported!");
+ emitInlineAsmError(
+ CS, "inline asm error: This value"
+ " type register class is not natively supported!");
return;
}
}
@@ -6527,10 +6943,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(),
- "invalid operand for inline asm constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
@@ -6570,20 +6984,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// TODO: Support this.
if (OpInfo.isIndirect) {
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(),
- "Don't know how to handle indirect register inputs yet "
- "for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(
+ CS, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty()) {
- LLVMContext &Ctx = *DAG.getContext();
- Ctx.emitError(CS.getInstruction(),
- "couldn't allocate input reg for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
+ emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
return;
}
@@ -6667,11 +7078,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Emit the non-flagged stores from the physregs.
SmallVector<SDValue, 8> OutChains;
for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
- SDValue Val = DAG.getStore(Chain, getCurSDLoc(),
- StoresToEmit[i].first,
+ SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
getValue(StoresToEmit[i].second),
- MachinePointerInfo(StoresToEmit[i].second),
- false, false, 0);
+ MachinePointerInfo(StoresToEmit[i].second));
OutChains.push_back(Val);
}
@@ -6681,6 +7090,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG.setRoot(Chain);
}
+void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
+ const Twine &Message) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), Message);
+
+ // Make sure we leave the DAG in a valid state
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType());
+ setValue(CS.getInstruction(), DAG.getUNDEF(VT));
+}
+
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
MVT::Other, getRoot(),
@@ -6715,16 +7135,49 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.getSrcValue(I.getArgOperand(1))));
}
-/// \brief Lower an argument list according to the target calling convention.
-///
-/// \return A tuple of <return-value, token-chain>
+SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
+ const Instruction &I,
+ SDValue Op) {
+ const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
+ if (!Range)
+ return Op;
+
+ Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
+ if (!Lo->isNullValue())
+ return Op;
+
+ Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
+ unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+
+ SDLoc SL = getCurSDLoc();
+
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
+ Op, DAG.getValueType(SmallVT));
+ unsigned NumVals = Op.getNode()->getNumValues();
+ if (NumVals == 1)
+ return ZExt;
+
+ SmallVector<SDValue, 4> Ops;
+
+ Ops.push_back(ZExt);
+ for (unsigned I = 1; I != NumVals; ++I)
+ Ops.push_back(Op.getValue(I));
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
+/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// the call being lowered.
///
/// This is a helper for lowering intrinsics that follow a target calling
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
- ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
- Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
+void SelectionDAGBuilder::populateCallLoweringInfo(
+ TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
+ unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
+ bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
Args.reserve(NumArgs);
@@ -6743,12 +7196,11 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
Args.push_back(Entry);
}
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
- .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
-
- return lowerInvokable(CLI, EHPadBB);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(CS->use_empty())
+ .setIsPatchPoint(IsPatchPoint);
}
/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6769,7 +7221,7 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
/// only available in a register, then the runtime would need to trap when
/// execution reaches the StackMap in order to read the alloca's location.
static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
- SDLoc DL, SmallVectorImpl<SDValue> &Ops,
+ const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
SDValue OpVal = Builder.getValue(CS.getArgument(i));
@@ -6889,8 +7341,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
Type *ReturnTy =
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
- std::pair<SDValue, SDValue> Result = lowerCallOperands(
- CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
+ true);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -7057,6 +7512,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.isNest = false;
Entry.isByVal = false;
Entry.isReturned = false;
+ Entry.isSwiftSelf = false;
+ Entry.isSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -7085,10 +7542,23 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
}
+ // We push in swifterror return as the last element of CLI.Ins.
+ ArgListTy &Args = CLI.getArgs();
+ if (supportSwiftError()) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ if (Args[i].isSwiftError) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = getPointerTy(DL);
+ MyFlags.ArgVT = EVT(getPointerTy(DL));
+ MyFlags.Flags.setSwiftError();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
- ArgListTy &Args = CLI.getArgs();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
@@ -7114,6 +7584,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInReg();
if (Args[i].isSRet)
Flags.setSRet();
+ if (Args[i].isSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Args[i].isSwiftError)
+ Flags.setSwiftError();
if (Args[i].isByVal)
Flags.setByVal();
if (Args[i].isInAlloca) {
@@ -7202,6 +7676,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<SDValue, 4> InVals;
CLI.Chain = LowerCall(CLI, InVals);
+ // Update CLI.InVals to use outside of this function.
+ CLI.InVals = InVals;
+
// Verify that the target's LowerCall behaved as expected.
assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
"LowerCall didn't return a valid chain!");
@@ -7219,12 +7696,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
return std::make_pair(SDValue(), SDValue());
}
- DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
- assert(InVals[i].getNode() &&
- "LowerCall emitted a null value!");
- assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
- "LowerCall emitted a value with the wrong type!");
- });
+#ifndef NDEBUG
+ for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() && "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ }
+#endif
SmallVector<SDValue, 4> ReturnValues;
if (!CanLowerReturn) {
@@ -7254,7 +7732,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
DemoteStackIdx, Offsets[i]),
- false, false, false, 1);
+ /* Alignment = */ 1);
ReturnValues[i] = L;
Chains[i] = L.getValue(1);
}
@@ -7263,7 +7741,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ Optional<ISD::NodeType> AssertOp;
if (CLI.RetSExt)
AssertOp = ISD::AssertSext;
else if (CLI.RetZExt)
@@ -7295,8 +7773,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
void TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- SDValue Res = LowerOperation(SDValue(N, 0), DAG);
- if (Res.getNode())
+ if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
Results.push_back(Res);
}
@@ -7394,6 +7871,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setInReg();
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError))
+ Flags.setSwiftError();
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
Flags.setByVal();
if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
@@ -7483,7 +7964,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
PointerType::getUnqual(F.getReturnType()), ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ Optional<ISD::NodeType> AssertOp = None;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
RegVT, VT, nullptr, AssertOp);
@@ -7524,7 +8005,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
if (!I->use_empty()) {
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ Optional<ISD::NodeType> AssertOp;
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
@@ -7559,6 +8040,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
+ // Update SwiftErrorMap.
+ if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() &&
+ F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) {
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg;
+ }
+
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
@@ -7656,7 +8145,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
EVT VT = ValueVTs[vti];
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
for (unsigned i = 0, e = NumRegisters; i != e; ++i)
- FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+ FuncInfo.PHINodesToUpdate.push_back(
+ std::make_pair(&*MBBI++, Reg + i));
Reg += NumRegisters;
}
}
@@ -7708,7 +8198,8 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
unsigned *TotalCases, unsigned First,
- unsigned Last) {
+ unsigned Last,
+ unsigned Density) {
assert(Last >= First);
assert(TotalCases[Last] >= TotalCases[First]);
@@ -7729,10 +8220,15 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
assert(NumCases < UINT64_MAX / 100);
assert(Range >= NumCases);
- return NumCases * 100 >= Range * MinJumpTableDensity;
+ return NumCases * 100 >= Range * Density;
}
-static inline bool areJTsAllowed(const TargetLowering &TLI) {
+static inline bool areJTsAllowed(const TargetLowering &TLI,
+ const SwitchInst *SI) {
+ const Function *Fn = SI->getParent()->getParent();
+ if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
+ return false;
+
return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
@@ -7826,7 +8322,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
#endif
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!areJTsAllowed(TLI))
+ if (!areJTsAllowed(TLI, SI))
return;
const int64_t N = Clusters.size();
@@ -7843,7 +8339,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
TotalCases[i] += TotalCases[i - 1];
}
- if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) {
+ unsigned MinDensity = JumpTableDensity;
+ if (DefaultMBB->getParent()->getFunction()->optForSize())
+ MinDensity = OptsizeJumpTableDensity;
+ if (N >= MinJumpTableSize
+ && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) {
// Cheap case: the whole range might be suitable for jump table.
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
@@ -7888,7 +8388,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
// Search for a solution that results in fewer partitions.
for (int64_t j = N - 1; j > i; j--) {
// Try building a partition from Clusters[i..j].
- if (isDense(Clusters, &TotalCases[0], i, j)) {
+ if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
bool IsTable = j - i + 1 >= MinJumpTableSize;
unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8fb85ff..b9888ae 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,14 +18,14 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
#include <vector>
namespace llvm {
@@ -101,8 +101,8 @@ class SelectionDAGBuilder {
unsigned SDNodeOrder;
public:
DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
- DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
- DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
+ : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
const DbgValueInst* getDI() { return DI; }
DebugLoc getdl() { return dl; }
unsigned getSDNodeOrder() { return SDNodeOrder; }
@@ -260,8 +260,9 @@ private:
};
struct JumpTableHeader {
JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
- bool E = false):
- First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+ bool E = false)
+ : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+ Emitted(E) {}
APInt First;
APInt Last;
const Value *SValue;
@@ -286,9 +287,9 @@ private:
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
- : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
- ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
- Prob(Pr) {}
+ : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+ RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+ Cases(std::move(C)), Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
@@ -303,12 +304,9 @@ private:
BranchProbability DefaultProb;
};
- /// Minimum jump table density, in percent.
- enum { MinJumpTableDensity = 40 };
-
/// Check whether a range of clusters is dense enough for a jump table.
bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
- unsigned First, unsigned Last);
+ unsigned First, unsigned Last, unsigned MinDensity);
/// Build a jump table cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
@@ -457,7 +455,14 @@ private:
///
/// c. After we finish selecting the basic block, in FinishBasicBlock if
/// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
- /// initialized, we first find a splice point in the parent basic block
+ /// initialized, we produce the validation code with one of these
+ /// techniques:
+ /// 1) with a call to a guard check function
+ /// 2) with inlined instrumentation
+ ///
+ /// 1) We insert a call to the check function before the terminator.
+ ///
+ /// 2) We first find a splice point in the parent basic block
/// before the terminator and then splice the terminator of said basic
/// block into the success basic block. Then we code-gen a new tail for
/// the parent basic block consisting of the two loads, the comparison,
@@ -467,29 +472,31 @@ private:
/// the same function, use the same failure basic block).
class StackProtectorDescriptor {
public:
- StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
- FailureMBB(nullptr), Guard(nullptr),
- GuardReg(0) { }
+ StackProtectorDescriptor()
+ : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {}
/// Returns true if all fields of the stack protector descriptor are
/// initialized implying that we should/are ready to emit a stack protector.
bool shouldEmitStackProtector() const {
- return ParentMBB && SuccessMBB && FailureMBB && Guard;
+ return ParentMBB && SuccessMBB && FailureMBB;
+ }
+
+ bool shouldEmitFunctionBasedCheckStackProtector() const {
+ return ParentMBB && !SuccessMBB && !FailureMBB;
}
/// Initialize the stack protector descriptor structure for a new basic
/// block.
- void initialize(const BasicBlock *BB,
- MachineBasicBlock *MBB,
- const CallInst &StackProtCheckCall) {
+ void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+ bool FunctionBasedInstrumentation) {
// Make sure we are not initialized yet.
assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
"already initialized!");
ParentMBB = MBB;
- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
- if (!Guard)
- Guard = StackProtCheckCall.getArgOperand(0);
+ if (!FunctionBasedInstrumentation) {
+ SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+ }
}
/// Reset state that changes when we handle different basic blocks.
@@ -518,17 +525,11 @@ private:
/// always the same.
void resetPerFunctionState() {
FailureMBB = nullptr;
- Guard = nullptr;
- GuardReg = 0;
}
MachineBasicBlock *getParentMBB() { return ParentMBB; }
MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
MachineBasicBlock *getFailureMBB() { return FailureMBB; }
- const Value *getGuard() { return Guard; }
-
- unsigned getGuardReg() const { return GuardReg; }
- void setGuardReg(unsigned R) { GuardReg = R; }
private:
/// The basic block for which we are generating the stack protector.
@@ -548,13 +549,6 @@ private:
/// contain a call to __stack_chk_fail().
MachineBasicBlock *FailureMBB;
- /// The guard variable which we will compare against the stored value in the
- /// stack protector stack slot.
- const Value *Guard;
-
- /// The virtual register holding the stack guard value.
- unsigned GuardReg;
-
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
/// block will be created. Assign a large weight if IsLikely is true.
@@ -708,28 +702,88 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
const BasicBlock *EHPadBB = nullptr);
- std::pair<SDValue, SDValue> lowerCallOperands(
- ImmutableCallSite CS,
- unsigned ArgIdx,
- unsigned NumArgs,
- SDValue Callee,
- Type *ReturnTy,
- const BasicBlock *EHPadBB = nullptr,
- bool IsPatchPoint = false);
+ // Lower range metadata from 0 to N to assert zext to an integer of nearest
+ // floor power of two.
+ SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
+ SDValue Op);
+
+ void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
+ ImmutableCallSite CS, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, bool IsPatchPoint);
+
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
/// UpdateSplitBlock - When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+ /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes
+ /// of lowering into a STATEPOINT node.
+ struct StatepointLoweringInfo {
+ /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set
+ /// of gc pointers this STATEPOINT has to relocate.
+ SmallVector<const Value *, 16> Bases;
+ SmallVector<const Value *, 16> Ptrs;
+
+ /// The set of gc.relocate calls associated with this gc.statepoint.
+ SmallVector<const GCRelocateInst *, 16> GCRelocates;
+
+ /// The full list of gc arguments to the gc.statepoint being lowered.
+ ArrayRef<const Use> GCArgs;
+
+ /// The gc.statepoint instruction.
+ const Instruction *StatepointInstr = nullptr;
+
+ /// The list of gc transition arguments present in the gc.statepoint being
+ /// lowered.
+ ArrayRef<const Use> GCTransitionArgs;
+
+ /// The ID that the resulting STATEPOINT instruction has to report.
+ unsigned ID = -1;
+
+ /// Information regarding the underlying call instruction.
+ TargetLowering::CallLoweringInfo CLI;
+
+ /// The deoptimization state associated with this gc.statepoint call, if
+ /// any.
+ ArrayRef<const Use> DeoptState;
+
+ /// Flags associated with the meta arguments being lowered.
+ uint64_t StatepointFlags = -1;
+
+ /// The number of patchable bytes the call needs to get lowered into.
+ unsigned NumPatchBytes = -1;
+
+ /// The exception handling unwind destination, in case this represents an
+ /// invoke of gc.statepoint.
+ const BasicBlock *EHPadBB = nullptr;
+
+ explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {}
+ };
+
+ /// Lower \p SLI into a STATEPOINT instruction.
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI);
+
// This function is responsible for the whole statepoint lowering process.
// It uniformly handles invoke and call statepoints.
void LowerStatepoint(ImmutableStatepoint Statepoint,
const BasicBlock *EHPadBB = nullptr);
-private:
- std::pair<SDValue, SDValue>
- lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
- const BasicBlock *EHPadBB = nullptr);
+ void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
+ const BasicBlock *EHPadBB);
+
+ void LowerDeoptimizeCall(const CallInst *CI);
+ void LowerDeoptimizingReturn();
+
+ void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee,
+ const BasicBlock *EHPadBB,
+ bool VarArgDisallowed,
+ bool ForceVoidReturnTy);
+
+private:
// Terminator instructions.
void visitRet(const ReturnInst &I);
void visitBr(const BranchInst &I);
@@ -840,6 +894,8 @@ private:
bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);
+ void visitLoadFromSwiftError(const LoadInst &I);
+ void visitStoreToSwiftError(const StoreInst &I);
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
@@ -853,10 +909,9 @@ private:
void visitPatchpoint(ImmutableCallSite CS,
const BasicBlock *EHPadBB = nullptr);
- // These three are implemented in StatepointLowering.cpp
- void visitStatepoint(const CallInst &I);
+ // These two are implemented in StatepointLowering.cpp
void visitGCRelocate(const GCRelocateInst &I);
- void visitGCResult(const CallInst &I);
+ void visitGCResult(const GCResultInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
@@ -870,6 +925,8 @@ private:
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+ void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
+
/// EmitFuncArgumentDbgValue - If V is an function argument then create
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
@@ -937,8 +994,7 @@ struct RegsForValue {
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
- SDLoc dl,
- SDValue &Chain, SDValue *Flag,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V = nullptr) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
@@ -946,18 +1002,16 @@ struct RegsForValue {
/// as the input and updates them for the output Chain/Flag. If the Flag
/// pointer is nullptr, no flag is used. If V is not nullptr, then it is used
/// in printing better diagnostic messages on error.
- void
- getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain,
- SDValue *Flag, const Value *V = nullptr,
- ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
+ SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
/// (if applicable), and includes the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind,
- bool HasMatching, unsigned MatchingIdx, SDLoc dl,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const;
+ void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index a1c6c4c..93ac6d6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -101,10 +101,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
- case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
+ case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA";
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
@@ -202,6 +203,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FCANONICALIZE: return "fcanonicalize";
case ISD::FPOW: return "fpow";
case ISD::SMIN: return "smin";
case ISD::SMAX: return "smax";
@@ -378,7 +380,7 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
-void SDNode::dump() const { dump(nullptr); }
+LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
@@ -590,7 +592,7 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
N->dump(G);
}
-void SelectionDAG::dump() const {
+LLVM_DUMP_METHOD void SelectionDAG::dump() const {
dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
@@ -630,7 +632,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
}
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
if (!once.insert(N).second) // If we've been here before, return now.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c075da4..1d61657 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -21,10 +21,10 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,8 +32,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -59,6 +59,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -317,7 +318,7 @@ namespace llvm {
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
}
-}
+} // end namespace llvm
// EmitInstrWithCustomInserter - This method should be implemented by targets
// that mark instructions with the 'usesCustomInserter' flag. These
@@ -329,7 +330,7 @@ namespace llvm {
// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
// DenseMap.
MachineBasicBlock *
-TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
#ifndef NDEBUG
dbgs() << "If a target marks an instruction with "
@@ -339,9 +340,9 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
llvm_unreachable(nullptr);
}
-void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
- assert(!MI->hasPostISelHook() &&
+ assert(!MI.hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
}
@@ -376,6 +377,8 @@ SelectionDAGISel::~SelectionDAGISel() {
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
+ AU.addRequired<StackProtector>();
+ AU.addPreserved<StackProtector>();
AU.addPreserved<GCModuleInfo>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -440,7 +443,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TM.resetTargetOptions(Fn);
// Reset OptLevel to None for optnone functions.
CodeGenOpt::Level NewOptLevel = OptLevel;
- if (Fn.hasFnAttribute(Attribute::OptimizeNone))
+ if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
NewOptLevel = CodeGenOpt::None;
OptLevelChanger OLC(*this, NewOptLevel);
@@ -468,11 +471,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF->setHasInlineAsm(false);
FuncInfo->SplitCSR = false;
- SmallVector<MachineBasicBlock*, 4> Returns;
// We split CSR if the target supports it for the given function
// and the function has only return exits.
- if (TLI->supportSplitCSR(MF)) {
+ if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
FuncInfo->SplitCSR = true;
// Collect all the return blocks.
@@ -481,12 +483,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
continue;
const TerminatorInst *Term = BB.getTerminator();
- if (isa<UnreachableInst>(Term))
+ if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
continue;
- if (isa<ReturnInst>(Term)) {
- Returns.push_back(FuncInfo->MBBMap[&BB]);
- continue;
- }
// Bail out if the exit block is not Return nor Unreachable.
FuncInfo->SplitCSR = false;
@@ -508,8 +506,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
// Insert copies in the entry block and the return blocks.
- if (FuncInfo->SplitCSR)
+ if (FuncInfo->SplitCSR) {
+ SmallVector<MachineBasicBlock*, 4> Returns;
+ // Collect all the return blocks.
+ for (MachineBasicBlock &MBB : mf) {
+ if (!MBB.succ_empty())
+ continue;
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && Term->isReturn()) {
+ Returns.push_back(&MBB);
+ continue;
+ }
+ }
TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+ }
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
@@ -669,7 +680,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
- SmallPtrSet<SDNode*, 128> VisitedNodes;
+ SmallPtrSet<SDNode*, 16> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
Worklist.push_back(CurDAG->getRoot().getNode());
@@ -854,7 +865,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Scheduler->Run(CurDAG, FuncInfo->MBB);
}
- if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph();
+ if (ViewSUnitDAGs && MatchFilterBB)
+ Scheduler->viewGraph();
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
@@ -937,23 +949,7 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
- SDNode *ResNode = Select(Node);
-
- // FIXME: This is pretty gross. 'Select' should be changed to not return
- // anything at all and this code should be nuked with a tactical strike.
-
- // If node should not be replaced, continue with the next one.
- if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
- continue;
- // Replace node.
- if (ResNode) {
- ReplaceUses(Node, ResNode);
- }
-
- // If after the replacement this node is not used any more,
- // remove this dead node.
- if (Node->use_empty()) // Don't delete EntryToken, etc.
- CurDAG->RemoveDeadNode(Node);
+ Select(Node);
}
CurDAG->setRoot(Dummy.getValue());
@@ -1147,7 +1143,125 @@ static void collectFailStats(const Instruction *I) {
case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
}
}
-#endif
+#endif // NDEBUG
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo) {
+ if (!TLI->supportSwiftError())
+ return;
+
+ FuncInfo->SwiftErrorVals.clear();
+ FuncInfo->SwiftErrorMap.clear();
+ FuncInfo->SwiftErrorWorklist.clear();
+
+ // Check if function has a swifterror argument.
+ for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
+ AI != AE; ++AI)
+ if (AI->hasSwiftErrorAttr())
+ FuncInfo->SwiftErrorVals.push_back(&*AI);
+
+ for (const auto &LLVMBB : Fn)
+ for (const auto &Inst : LLVMBB) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+ if (Alloca->isSwiftError())
+ FuncInfo->SwiftErrorVals.push_back(Alloca);
+ }
+}
+
+/// For each basic block, merge incoming swifterror values or simply propagate
+/// them. The merged results will be saved in SwiftErrorMap. For predecessors
+/// that are not yet visited, we create virtual registers to hold the swifterror
+/// values and save them in SwiftErrorWorklist.
+static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const BasicBlock *LLVMBB,
+ SelectionDAGBuilder *SDB) {
+ if (!TLI->supportSwiftError())
+ return;
+
+ // We should only do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (FuncInfo->SwiftErrorVals.empty())
+ return;
+
+ // At beginning of a basic block, insert PHI nodes or get the virtual
+ // register from the only predecessor, and update SwiftErrorMap; if one
+ // of the predecessors is not visited, update SwiftErrorWorklist.
+ // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy
+ // to sync up the virtual register assignment.
+
+ // Always create a virtual register for each swifterror value in entry block.
+ auto &DL = SDB->DAG.getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+ }
+ return;
+ }
+
+ if (auto *UniquePred = LLVMBB->getUniquePredecessor()) {
+ auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred];
+ if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) {
+ // Update SwiftErrorWorklist with a new virtual register.
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg);
+ // Propagate the information from the single predecessor.
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+ }
+ return;
+ }
+ // Propagate the information from the single predecessor.
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB] =
+ FuncInfo->SwiftErrorMap[UniquePredMBB];
+ return;
+ }
+
+ // For the case of multiple predecessors, update SwiftErrorWorklist.
+ // Handle the case where we have two or more predecessors being the same.
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ auto *PredMBB = FuncInfo->MBBMap[*PI];
+ if (!FuncInfo->SwiftErrorMap.count(PredMBB) &&
+ !FuncInfo->SwiftErrorWorklist.count(PredMBB)) {
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // When we actually visit the basic block PredMBB, we will materialize
+ // the virtual register assignment in copySwiftErrorsToFinalVRegs.
+ FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg);
+ }
+ }
+ }
+
+ // For the case of multiple predecessors, create a virtual register for
+ // each swifterror value and generate Phi node.
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+
+ MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB,
+ FuncInfo->MBB->begin(), SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::PHI), VReg);
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ auto *PredMBB = FuncInfo->MBBMap[*PI];
+ unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ?
+ FuncInfo->SwiftErrorMap[PredMBB][I] :
+ FuncInfo->SwiftErrorWorklist[PredMBB][I];
+ SwiftErrorPHI.addReg(SwiftErrorReg)
+ .addMBB(PredMBB);
+ }
+ }
+}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
@@ -1155,6 +1269,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (TM.Options.EnableFastISel)
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+ setupSwiftErrorVals(Fn, TLI, FuncInfo);
+
// Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
for (ReversePostOrderTraversal<const Function*>::rpo_iterator
@@ -1193,6 +1309,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!FuncInfo->MBB)
continue; // Some blocks like catchpads have no code or MBB.
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+ mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB);
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1228,7 +1345,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// where they are, so we can be sure to emit subsequent instructions
// after them.
if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
- FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt));
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
else
FastIS->setLastLocalValue(nullptr);
}
@@ -1345,6 +1462,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
LowerArguments(Fn);
}
}
+ if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
+ bool FunctionBasedInstrumentation =
+ TLI->getSSPStackGuardCheck(*Fn.getParent());
+ SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
+ FunctionBasedInstrumentation);
+ }
if (Begin != BI)
++NumDAGBlocks;
@@ -1376,15 +1499,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
/// terminator instructors so we can satisfy ABI constraints. A partial
/// terminator sequence is an improper subset of a terminator sequence (i.e. it
/// may be the whole terminator sequence).
-static bool MIIsInTerminatorSequence(const MachineInstr *MI) {
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
// If we do not have a copy or an implicit def, we return true if and only if
// MI is a debug value.
- if (!MI->isCopy() && !MI->isImplicitDef())
+ if (!MI.isCopy() && !MI.isImplicitDef())
// Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
// physical registers if there is debug info associated with the terminator
// of our mbb. We want to include said debug info in our terminator
// sequence, so we return true in that case.
- return MI->isDebugValue();
+ return MI.isDebugValue();
// We have left the terminator sequence if we are not doing one of the
// following:
@@ -1394,18 +1517,18 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) {
// 3. Defining a register via an implicit def.
// OPI should always be a register definition...
- MachineInstr::const_mop_iterator OPI = MI->operands_begin();
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
if (!OPI->isReg() || !OPI->isDef())
return false;
// Defining any register via an implicit def is always ok.
- if (MI->isImplicitDef())
+ if (MI.isImplicitDef())
return true;
// Grab the copy source...
MachineInstr::const_mop_iterator OPI2 = OPI;
++OPI2;
- assert(OPI2 != MI->operands_end()
+ assert(OPI2 != MI.operands_end()
&& "Should have a copy implying we should have 2 arguments.");
// Make sure that the copy dest is not a vreg when the copy source is a
@@ -1432,7 +1555,7 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) {
/// terminator, but additionally the copies that move the vregs into the
/// physical registers.
static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
+FindSplitPointForStackProtector(MachineBasicBlock *BB) {
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
//
if (SplitPoint == BB->begin())
@@ -1442,7 +1565,7 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
MachineBasicBlock::iterator Previous = SplitPoint;
--Previous;
- while (MIIsInTerminatorSequence(Previous)) {
+ while (MIIsInTerminatorSequence(*Previous)) {
SplitPoint = Previous;
if (Previous == Start)
break;
@@ -1454,7 +1577,6 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
void
SelectionDAGISel::FinishBasicBlock() {
-
DEBUG(dbgs() << "Total amount of phi nodes to update: "
<< FuncInfo->PHINodesToUpdate.size() << "\n";
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
@@ -1474,7 +1596,23 @@ SelectionDAGISel::FinishBasicBlock() {
}
// Handle stack protector.
- if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ // The target provides a guard check function. There is no need to
+ // generate error handling code or to split current basic block.
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+
+ // Add load and check to the basicblock.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt =
+ FindSplitPointForStackProtector(ParentMBB);
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ } else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
@@ -1485,7 +1623,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc());
+ FindSplitPointForStackProtector(ParentMBB);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -1502,7 +1640,7 @@ SelectionDAGISel::FinishBasicBlock() {
// CodeGen Failure MBB if we have not codegened it yet.
MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
- if (!FailureMBB->size()) {
+ if (FailureMBB->empty()) {
FuncInfo->MBB = FailureMBB;
FuncInfo->InsertPt = FailureMBB->end();
SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
@@ -1515,52 +1653,61 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->SPDescriptor.resetPerBBState();
}
- for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+ // Lower each BitTestBlock.
+ for (auto &BTB : SDB->BitTestCases) {
// Lower header first, if it wasn't already lowered
- if (!SDB->BitTestCases[i].Emitted) {
+ if (!BTB.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->MBB = BTB.Parent;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+ SDB->visitBitTestHeader(BTB, FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
}
- BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob;
- for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
- UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb;
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->MBB = BTB.Cases[j].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
// If all cases cover a contiguous range, it is not necessary to jump to
// the default block after the last bit test fails. This is because the
// range check during bit test header creation has guaranteed that every
- // case here doesn't go outside the range.
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
MachineBasicBlock *NextMBB;
- if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
- NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB;
- else if (j + 1 != ej)
- NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB;
- else
- NextMBB = SDB->BitTestCases[i].Default;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
- SDB->visitBitTestCase(SDB->BitTestCases[i],
- NextMBB,
- UnhandledProb,
- SDB->BitTestCases[i].Reg,
- SDB->BitTestCases[i].Cases[j],
+ SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j],
FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
- if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
break;
+ }
}
// Update PHI Nodes
@@ -1571,16 +1718,18 @@ SelectionDAGISel::FinishBasicBlock() {
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
// This is "default" BB. We have two jumps to it. From "header" BB and
- // from last "case" BB.
- if (PHIBB == SDB->BitTestCases[i].Default)
- PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(SDB->BitTestCases[i].Parent)
- .addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB);
+ // from last "case" BB, unless the latter was skipped.
+ if (PHIBB == BTB.Default) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(BTB.Cases.back().ThisBB);
+ }
+ }
// One of "cases" BB.
- for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+ for (unsigned j = 0, ej = BTB.Cases.size();
j != ej; ++j) {
- MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ MachineBasicBlock* cBB = BTB.Cases[j].ThisBB;
if (cBB->isSuccessor(PHIBB))
PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
}
@@ -1685,7 +1834,6 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->SwitchCases.clear();
}
-
/// Create the scheduler. If a specific scheduler was specified
/// via the SchedulerRegistry, use it, otherwise select the
/// one preferred by the target.
@@ -1764,8 +1912,8 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
/// by tblgen. Others should not call it.
-void SelectionDAGISel::
-SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) {
+void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
+ const SDLoc &DL) {
std::vector<SDValue> InOps;
std::swap(InOps, Ops);
@@ -1802,15 +1950,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) {
// Otherwise, this is a memory operand. Ask the target to select it.
std::vector<SDValue> SelOps;
- if (SelectInlineAsmMemoryOperand(InOps[i+1],
- InlineAsm::getMemoryConstraintID(Flags),
- SelOps))
+ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
// Add this to the output node.
unsigned NewFlags =
InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
i += 2;
@@ -1956,7 +2104,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
-SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
SDLoc DL(N);
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
@@ -1965,11 +2113,11 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
const EVT VTs[] = {MVT::Other, MVT::Glue};
SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
New->setNodeId(-1);
- return New.getNode();
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
}
-SDNode
-*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
+void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
@@ -1979,11 +2127,11 @@ SDNode
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
- return New.getNode();
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
}
-SDNode
-*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
+void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
@@ -1993,13 +2141,12 @@ SDNode
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
- return New.getNode();
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
}
-
-
-SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
- return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+void SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
}
/// GetVBR - decode a vbr encoding whose top bit is set.
@@ -2019,15 +2166,11 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
return Val;
}
-
-/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
-/// interior glue and chain results to use the new glue and chain results.
-void SelectionDAGISel::
-UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SDValue InputGlue,
- const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
- bool isMorphNodeTo) {
+/// When a match is complete, this method updates uses of interior chain results
+/// to use the new results.
+void SelectionDAGISel::UpdateChains(
+ SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
// Now that all the normal results are replaced, we replace the chain and
@@ -2039,10 +2182,8 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
-
- // If this node was already deleted, don't look at it.
- if (ChainNode->getOpcode() == ISD::DELETED_NODE)
- continue;
+ assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted node left in chain");
// Don't replace the results of the root node if we're doing a
// MorphNodeTo.
@@ -2056,35 +2197,12 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
- if (ChainNode->use_empty() &&
+ if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
- // If the result produces glue, update any glue results in the matched
- // pattern with the glue result.
- if (InputGlue.getNode()) {
- // Handle any interior nodes explicitly marked.
- for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
- SDNode *FRN = GlueResultNodesMatched[i];
-
- // If this node was already deleted, don't look at it.
- if (FRN->getOpcode() == ISD::DELETED_NODE)
- continue;
-
- assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
- "Doesn't have a glue result");
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputGlue);
-
- // If the node became dead and we haven't already seen it, delete it.
- if (FRN->use_empty() &&
- !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
- NowDeadNodes.push_back(FRN);
- }
- }
-
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes);
@@ -2108,8 +2226,9 @@ enum ChainResult {
/// already selected nodes "below" us.
static ChainResult
WalkChainUsers(const SDNode *ChainedNode,
- SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
- SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
+ DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
+ SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
@@ -2190,7 +2309,15 @@ WalkChainUsers(const SDNode *ChainedNode,
// as a new TokenFactor.
//
// To distinguish these two cases, do a recursive walk down the uses.
- switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ auto MemoizeResult = TokenFactorResult.find(User);
+ bool Visited = MemoizeResult != TokenFactorResult.end();
+ // Recursively walk chain users only if the result is not memoized.
+ if (!Visited) {
+ auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
+ InteriorChainedNodes);
+ MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
+ }
+ switch (MemoizeResult->second) {
case CR_Simple:
// If the uses of the TokenFactor are just already-selected nodes, ignore
// it, it is "below" our pattern.
@@ -2210,9 +2337,10 @@ WalkChainUsers(const SDNode *ChainedNode,
// ultimate chain result of the generated code. We will also add its chain
// inputs as inputs to the ultimate TokenFactor we create.
Result = CR_LeadsToInteriorNode;
- ChainedNodesInPattern.push_back(User);
- InteriorChainedNodes.push_back(User);
- continue;
+ if (!Visited) {
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ }
}
return Result;
@@ -2227,12 +2355,16 @@ WalkChainUsers(const SDNode *ChainedNode,
static SDValue
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
SelectionDAG *CurDAG) {
+ // Used for memoization. Without it WalkChainUsers could take exponential
+ // time to run.
+ DenseMap<const SDNode *, ChainResult> TokenFactorResult;
// Walk all of the chained nodes we've matched, recursively scanning down the
// users of the chain result. This adds any TokenFactor nodes that are caught
// in between chained nodes to the chained and interior nodes list.
SmallVector<SDNode*, 3> InteriorChainedNodes;
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ TokenFactorResult,
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}
@@ -2322,8 +2454,10 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
- if (Res != Node)
+ if (Res != Node) {
CurDAG->ReplaceAllUsesWith(Node, Res);
+ CurDAG->RemoveDeadNode(Node);
+ }
return Res;
}
@@ -2534,7 +2668,6 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
}
namespace {
-
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
@@ -2552,7 +2685,7 @@ struct MatchScope {
SDValue InputChain, InputGlue;
/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
- bool HasChainNodesMatched, HasGlueResultNodesMatched;
+ bool HasChainNodesMatched;
};
/// \\brief A DAG update listener to keep the matching state
@@ -2591,11 +2724,11 @@ public:
J.setNode(E);
}
};
-}
+} // end anonymous namespace
-SDNode *SelectionDAGISel::
-SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
- unsigned TableSize) {
+void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
+ const unsigned char *MatcherTable,
+ unsigned TableSize) {
// FIXME: Should these even be selected? Handle these cases in the caller?
switch (NodeToMatch->getOpcode()) {
default:
@@ -2623,16 +2756,25 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
- return nullptr;
+ return;
case ISD::AssertSext:
case ISD::AssertZext:
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
NodeToMatch->getOperand(0));
- return nullptr;
- case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
- case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch);
- case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch);
- case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ CurDAG->RemoveDeadNode(NodeToMatch);
+ return;
+ case ISD::INLINEASM:
+ Select_INLINEASM(NodeToMatch);
+ return;
+ case ISD::READ_REGISTER:
+ Select_READ_REGISTER(NodeToMatch);
+ return;
+ case ISD::WRITE_REGISTER:
+ Select_WRITE_REGISTER(NodeToMatch);
+ return;
+ case ISD::UNDEF:
+ Select_UNDEF(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
@@ -2665,7 +2807,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// which ones they are. The result is captured into this list so that we can
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- SmallVector<SDNode*, 3> GlueResultNodesMatched;
DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
@@ -2771,7 +2912,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NewEntry.InputChain = InputChain;
NewEntry.InputGlue = InputGlue;
NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
- NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
MatchScopes.push_back(NewEntry);
continue;
}
@@ -2816,6 +2956,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
}
+ case OPC_MoveChild0: case OPC_MoveChild1:
+ case OPC_MoveChild2: case OPC_MoveChild3:
+ case OPC_MoveChild4: case OPC_MoveChild5:
+ case OPC_MoveChild6: case OPC_MoveChild7: {
+ unsigned ChildNo = Opcode-OPC_MoveChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
@@ -3028,12 +3180,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (Imm->getOpcode() == ISD::Constant) {
const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
- Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(),
- true);
+ Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
} else if (Imm->getOpcode() == ISD::ConstantFP) {
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
- Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch),
- Imm.getValueType(), true);
+ Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
}
RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
@@ -3041,7 +3193,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
- case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
+ case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1
+ case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2
// These are space-optimized forms of OPC_EmitMergeInputChains.
assert(!InputChain.getNode() &&
"EmitMergeInputChains should be the first chain producing node");
@@ -3049,7 +3202,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
"Should only have one EmitMergeInputChains per match");
// Read all of the chained nodes.
- unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+ unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0;
assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
@@ -3137,13 +3290,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
}
- case OPC_EmitNode:
- case OPC_MorphNodeTo: {
+ case OPC_EmitNode: case OPC_MorphNodeTo:
+ case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
+ case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
// Get the result VT list.
- unsigned NumVTs = MatcherTable[MatcherIndex++];
+ unsigned NumVTs;
+ // If this is one of the compressed forms, get the number of VTs based
+ // on the Opcode. Otherwise read the next byte from the table.
+ if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
+ NumVTs = Opcode - OPC_MorphNodeTo0;
+ else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
+ NumVTs = Opcode - OPC_EmitNode0;
+ else
+ NumVTs = MatcherTable[MatcherIndex++];
SmallVector<EVT, 4> VTs;
for (unsigned i = 0; i != NumVTs; ++i) {
MVT::SimpleValueType VT =
@@ -3205,7 +3367,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Create the node.
SDNode *Res = nullptr;
- if (Opcode != OPC_MorphNodeTo) {
+ bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
+ (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
+ if (!IsMorphNodeTo) {
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch),
@@ -3218,13 +3382,17 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
nullptr));
}
- } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
- Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo);
} else {
- // NodeToMatch was eliminated by CSE when the target changed the DAG.
- // We will visit the equivalent node later.
- DEBUG(dbgs() << "Node was eliminated by CSE\n");
- return nullptr;
+ assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
+ "NodeToMatch was removed partway through selection");
+ SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
+ SDNode *E) {
+ auto &Chain = ChainNodesMatched;
+ assert((!E || llvm::find(Chain, N) == Chain.end()) &&
+ "Chain node replaced during MorphNode");
+ Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+ });
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo);
}
// If the node had chain/glue results, update our notion of the current
@@ -3285,31 +3453,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
DEBUG(dbgs() << " "
- << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << (IsMorphNodeTo ? "Morphed" : "Created")
<< " node: "; Res->dump(CurDAG); dbgs() << "\n");
// If this was a MorphNodeTo then we're completely done!
- if (Opcode == OPC_MorphNodeTo) {
- // Update chain and glue uses.
- UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
- InputGlue, GlueResultNodesMatched, true);
- return Res;
- }
-
- continue;
- }
-
- case OPC_MarkGlueResults: {
- unsigned NumNodes = MatcherTable[MatcherIndex++];
-
- // Read and remember all the glue-result nodes.
- for (unsigned i = 0; i != NumNodes; ++i) {
- unsigned RecNo = MatcherTable[MatcherIndex++];
- if (RecNo & 128)
- RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-
- assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults");
- GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ if (IsMorphNodeTo) {
+ // Update chain uses.
+ UpdateChains(Res, InputChain, ChainNodesMatched, true);
+ return;
}
continue;
}
@@ -3341,20 +3492,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
- // If the root node defines glue, add it to the glue nodes to update list.
- if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
- GlueResultNodesMatched.push_back(NodeToMatch);
+ // Update chain uses.
+ UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false);
- // Update chain and glue uses.
- UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
- InputGlue, GlueResultNodesMatched, false);
+ // If the root node defines glue, we need to update it to the glue result.
+ // TODO: This never happens in our tests and I think it can be removed /
+ // replaced with an assert, but if we do it this the way the change is
+ // NFC.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
+ MVT::Glue &&
+ InputGlue.getNode())
+ CurDAG->ReplaceAllUsesOfValueWith(
+ SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue);
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
+ CurDAG->RemoveDeadNode(NodeToMatch);
- // FIXME: We just return here, which interacts correctly with SelectRoot
- // above. We should fix this to not return an SDNode* anymore.
- return nullptr;
+ return;
}
}
@@ -3366,7 +3521,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
while (1) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
- return nullptr;
+ return;
}
// Restore the interpreter state back to the point where the scope was
@@ -3387,8 +3542,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
InputGlue = LastScope.InputGlue;
if (!LastScope.HasChainNodesMatched)
ChainNodesMatched.clear();
- if (!LastScope.HasGlueResultNodesMatched)
- GlueResultNodesMatched.clear();
// Check to see what the offset is at the new MatcherIndex. If it is zero
// we have reached the end of this scope, otherwise we have another child
@@ -3411,8 +3564,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
}
-
-
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 00db942..55f70f7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the TargetSelectionDAGInfo class.
+// This implements the SelectionDAGTargetInfo class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
using namespace llvm;
-TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
-}
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 02545a7..90aaba2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -53,13 +53,10 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
"Trying to visit statepoint before finished processing previous one");
Locations.clear();
NextSlotToAllocate = 0;
- // Need to resize this on each safepoint - we need the two to stay in
- // sync and the clear patterns of a SelectionDAGBuilder have no relation
- // to FunctionLoweringInfo.
+ // Need to resize this on each safepoint - we need the two to stay in sync and
+ // the clear patterns of a SelectionDAGBuilder have no relation to
+ // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false.
AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
- for (size_t i = 0; i < AllocatedStackSlots.size(); i++) {
- AllocatedStackSlots[i] = false;
- }
}
void StatepointLoweringState::clear() {
@@ -72,49 +69,46 @@ void StatepointLoweringState::clear() {
SDValue
StatepointLoweringState::allocateStackSlot(EVT ValueType,
SelectionDAGBuilder &Builder) {
-
NumSlotsAllocatedForStatepoints++;
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- // The basic scheme here is to first look for a previously created stack slot
- // which is not in use (accounting for the fact arbitrary slots may already
- // be reserved), or to create a new stack slot and use it.
-
- // If this doesn't succeed in 40000 iterations, something is seriously wrong
- for (int i = 0; i < 40000; i++) {
- assert(Builder.FuncInfo.StatepointStackSlots.size() ==
- AllocatedStackSlots.size() &&
- "broken invariant");
- const size_t NumSlots = AllocatedStackSlots.size();
- assert(NextSlotToAllocate <= NumSlots && "broken invariant");
-
- if (NextSlotToAllocate >= NumSlots) {
- assert(NextSlotToAllocate == NumSlots);
- // record stats
- if (NumSlots + 1 > StatepointMaxSlotsRequired) {
- StatepointMaxSlotsRequired = NumSlots + 1;
- }
+ unsigned SpillSize = ValueType.getSizeInBits() / 8;
+ assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
- SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
- const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- MFI->markAsStatepointSpillSlotObjectIndex(FI);
+ // First look for a previously created stack slot which is not in
+ // use (accounting for the fact arbitrary slots may already be
+ // reserved), or to create a new stack slot and use it.
- Builder.FuncInfo.StatepointStackSlots.push_back(FI);
- AllocatedStackSlots.push_back(true);
- return SpillSlot;
- }
- if (!AllocatedStackSlots[NextSlotToAllocate]) {
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
+
+ // The stack slots in StatepointStackSlots beyond the first NumSlots were
+ // added in this instance of StatepointLoweringState, and cannot be re-used.
+ assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
+ if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
- AllocatedStackSlots[NextSlotToAllocate] = true;
- return Builder.DAG.getFrameIndex(FI, ValueType);
+ if (MFI->getObjectSize(FI) == SpillSize) {
+ AllocatedStackSlots.set(NextSlotToAllocate);
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
}
- // Note: We deliberately choose to advance this only on the failing path.
- // Doing so on the succeeding path involves a bit of complexity that caused
- // a minor bug previously. Unless performance shows this matters, please
- // keep this code as simple as possible.
- NextSlotToAllocate++;
}
- llvm_unreachable("infinite loop?");
+
+ // Couldn't find a free slot, so create a new one:
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+
+ StatepointMaxSlotsRequired = std::max<unsigned long>(
+ StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
+
+ return SpillSlot;
}
/// Utility function for reservePreviousStackSlotForValue. Tries to find
@@ -125,24 +119,23 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
int LookUpDepth) {
// Can not look any further - give up now
if (LookUpDepth <= 0)
- return Optional<int>();
+ return None;
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
- FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
- Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()];
+ const auto &SpillMap =
+ Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()];
auto It = SpillMap.find(Relocate->getDerivedPtr());
if (It == SpillMap.end())
- return Optional<int>();
+ return None;
return It->second;
}
// Look through bitcast instructions.
- if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) {
+ if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val))
return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
- }
// Look through phi nodes
// All incoming values should have same known stack slot, otherwise result
@@ -154,10 +147,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
Optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
if (!SpillSlot.hasValue())
- return Optional<int>();
+ return None;
if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
- return Optional<int>();
+ return None;
MergedResult = SpillSlot;
}
@@ -192,7 +185,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// which we visit values is unspecified.
// Don't know any information about this instruction
- return Optional<int>();
+ return None;
}
/// Try to find existing copies of the incoming values in stack slots used for
@@ -213,7 +206,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
if (OldLocation.getNode())
- // duplicates in input
+ // Duplicates in input
return;
const int LookUpDepth = 6;
@@ -222,14 +215,14 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
if (!Index.hasValue())
return;
- auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
- Builder.FuncInfo.StatepointStackSlots.end(), *Index);
- assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() &&
- "value spilled to the unknown stack slot");
+ const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
+
+ auto SlotIt = find(StatepointSlots, *Index);
+ assert(SlotIt != StatepointSlots.end() &&
+ "Value spilled to the unknown stack slot");
// This is one of our dedicated lowering slots
- const int Offset =
- std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
+ const int Offset = std::distance(StatepointSlots.begin(), SlotIt);
if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
// stack slot already assigned to someone else, can't use it!
// TODO: currently we reserve space for gc arguments after doing
@@ -252,24 +245,30 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
/// is not required for correctness. It's purpose is to reduce the size of
/// StackMap section. It has no effect on the number of spill slots required
/// or the actual lowering.
-static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
- SmallVectorImpl<const Value *> &Ptrs,
- SmallVectorImpl<const Value *> &Relocs,
- SelectionDAGBuilder &Builder) {
-
- // This is horribly inefficient, but I don't care right now
- SmallSet<SDValue, 64> Seen;
-
- SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
- for (size_t i = 0; i < Ptrs.size(); i++) {
+static void
+removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const GCRelocateInst *> &Relocs,
+ SelectionDAGBuilder &Builder,
+ FunctionLoweringInfo::StatepointSpillMap &SSM) {
+ DenseMap<SDValue, const Value *> Seen;
+
+ SmallVector<const Value *, 64> NewBases, NewPtrs;
+ SmallVector<const GCRelocateInst *, 64> NewRelocs;
+ for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
SDValue SD = Builder.getValue(Ptrs[i]);
- // Only add non-duplicates
- if (Seen.count(SD) == 0) {
+ auto SeenIt = Seen.find(SD);
+
+ if (SeenIt == Seen.end()) {
+ // Only add non-duplicates
NewBases.push_back(Bases[i]);
NewPtrs.push_back(Ptrs[i]);
NewRelocs.push_back(Relocs[i]);
+ Seen[SD] = Ptrs[i];
+ } else {
+ // Duplicate pointer found, note in SSM and move on:
+ SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
}
- Seen.insert(SD);
}
assert(Bases.size() >= NewBases.size());
assert(Ptrs.size() >= NewPtrs.size());
@@ -284,43 +283,13 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// Extract call from statepoint, lower it and return pointer to the
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
-static SDNode *
-lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
- SelectionDAGBuilder &Builder,
- SmallVectorImpl<SDValue> &PendingExports) {
-
- ImmutableCallSite CS(ISP.getCallSite());
-
- SDValue ActualCallee;
-
- if (ISP.getNumPatchBytes() > 0) {
- // If we've been asked to emit a nop sequence instead of a call instruction
- // for this statepoint then don't lower the call target, but use a constant
- // `null` instead. Not lowering the call target lets statepoint clients get
- // away without providing a physical address for the symbolic call target at
- // link time.
-
- const auto &TLI = Builder.DAG.getTargetLoweringInfo();
- const auto &DL = Builder.DAG.getDataLayout();
-
- unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
- ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(),
- TLI.getPointerTy(DL, AS));
- } else
- ActualCallee = Builder.getValue(ISP.getCalledValue());
-
- assert(CS.getCallingConv() != CallingConv::AnyReg &&
- "anyregcc is not supported on statepoints!");
-
- Type *DefTy = ISP.getActualReturnType();
- bool HasDef = !DefTy->isVoidTy();
+static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
SDValue ReturnValue, CallEndVal;
- std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
- ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
- ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB,
- false /* IsPatchPoint */);
-
+ std::tie(ReturnValue, CallEndVal) =
+ Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
SDNode *CallEnd = CallEndVal.getNode();
// Get a call instruction from the call sequence chain. Tail calls are not
@@ -339,6 +308,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
// to grab the return value from the return register(s), or it can be a LOAD
// to load a value returned by reference via a stack slot.
+ bool HasDef = !SI.CLI.RetTy->isVoidTy();
if (HasDef) {
if (CallEnd->getOpcode() == ISD::LOAD)
CallEnd = CallEnd->getOperand(0).getNode();
@@ -348,70 +318,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
}
assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
-
- // Export the result value if needed
- const Instruction *GCResult = ISP.getGCResult();
- if (HasDef && GCResult) {
- if (GCResult->getParent() != CS.getParent()) {
- // Result value will be used in a different basic block so we need to
- // export it now.
- // Default exporting mechanism will not work here because statepoint call
- // has a different type than the actual call. It means that by default
- // llvm will create export register of the wrong type (always i32 in our
- // case). So instead we need to create export register with correct type
- // manually.
- // TODO: To eliminate this problem we can remove gc.result intrinsics
- // completely and make statepoint call to return a tuple.
- unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
- RegsForValue RFV(
- *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
- Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType());
- SDValue Chain = Builder.DAG.getEntryNode();
-
- RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain,
- nullptr);
- PendingExports.push_back(Chain);
- Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
- } else {
- // Result value will be used in a same basic block. Don't export it or
- // perform any explicit register copies.
- // We'll replace the actuall call node shortly. gc_result will grab
- // this value.
- Builder.setValue(CS.getInstruction(), ReturnValue);
- }
- } else {
- // The token value is never used from here on, just generate a poison value
- Builder.setValue(CS.getInstruction(),
- Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc()));
- }
-
- return CallEnd->getOperand(0).getNode();
-}
-
-/// Callect all gc pointers coming into statepoint intrinsic, clean them up,
-/// and return two arrays:
-/// Bases - base pointers incoming to this statepoint
-/// Ptrs - derived pointers incoming to this statepoint
-/// Relocs - the gc_relocate corresponding to each base/ptr pair
-/// Elements of this arrays should be in one-to-one correspondence with each
-/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call
-static void getIncomingStatepointGCValues(
- SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs,
- SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite,
- SelectionDAGBuilder &Builder) {
- for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
- Relocs.push_back(Relocate);
- Bases.push_back(Relocate->getBasePtr());
- Ptrs.push_back(Relocate->getDerivedPtr());
- }
-
- // Remove any redundant llvm::Values which map to the same SDValue as another
- // input. Also has the effect of removing duplicates in the original
- // llvm::Value input list as well. This is a useful optimization for
- // reducing the size of the StackMap section. It has no other impact.
- removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder);
-
- assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size());
+ return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
}
/// Spill a value incoming to the statepoint. It might be either part of
@@ -429,7 +336,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
if (!Loc.getNode()) {
Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
Builder);
- assert(isa<FrameIndexSDNode>(Loc));
int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
@@ -437,10 +343,22 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// TODO: We can create TokenFactor node instead of
// chaining stores one after another, this may allow
// a bit more optimal scheduling for them
+
+#ifndef NDEBUG
+ // Right now we always allocate spill slots that are of the same
+ // size as the value we're about to spill (the size of spillee can
+ // vary since we spill vectors of pointers too). At some point we
+ // can consider allowing spills of smaller values to larger slots
+ // (i.e. change the '==' in the assert below to a '>=').
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ assert((MFI->getObjectSize(Index) * 8) ==
+ Incoming.getValueType().getSizeInBits() &&
+ "Bad spill: stack slot does not match!");
+#endif
+
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
MachinePointerInfo::getFixedStack(
- Builder.DAG.getMachineFunction(), Index),
- false, false, 0);
+ Builder.DAG.getMachineFunction(), Index));
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
@@ -478,8 +396,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
// spill location. This would be a useful optimization, but would
// need to be optional since it requires a lot of complexity on the
// runtime side which not all would support.
- std::pair<SDValue, SDValue> Res =
- spillIncomingStatepointValue(Incoming, Chain, Builder);
+ auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
Ops.push_back(Res.first);
Chain = Res.second;
}
@@ -494,43 +411,37 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
/// completion, 'Ops' will contain ready to use operands for machine code
/// statepoint. The chain nodes will have already been created and the DAG root
/// will be set to the last value spilled (if any were).
-static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
- ImmutableStatepoint StatepointSite,
- SelectionDAGBuilder &Builder) {
-
- // Lower the deopt and gc arguments for this statepoint. Layout will
- // be: deopt argument length, deopt arguments.., gc arguments...
-
- SmallVector<const Value *, 64> Bases, Ptrs, Relocations;
- getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite,
- Builder);
-
+static void
+lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder) {
+ // Lower the deopt and gc arguments for this statepoint. Layout will be:
+ // deopt argument length, deopt arguments.., gc arguments...
#ifndef NDEBUG
- // Check that each of the gc pointer and bases we've gotten out of the
- // safepoint is something the strategy thinks might be a pointer (or vector
- // of pointers) into the GC heap. This is basically just here to help catch
- // errors during statepoint insertion. TODO: This should actually be in the
- // Verifier, but we can't get to the GCStrategy from there (yet).
- GCStrategy &S = Builder.GFI->getStrategy();
- for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
- assert(Opt.getValue() &&
- "non gc managed base pointer found in statepoint");
- }
- }
- for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
- assert(Opt.getValue() &&
- "non gc managed derived pointer found in statepoint");
+ if (auto *GFI = Builder.GFI) {
+ // Check that each of the gc pointer and bases we've gotten out of the
+ // safepoint is something the strategy thinks might be a pointer (or vector
+ // of pointers) into the GC heap. This is basically just here to help catch
+ // errors during statepoint insertion. TODO: This should actually be in the
+ // Verifier, but we can't get to the GCStrategy from there (yet).
+ GCStrategy &S = GFI->getStrategy();
+ for (const Value *V : SI.Bases) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed base pointer found in statepoint");
+ }
}
- }
- for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
- assert(Opt.getValue() && "non gc managed pointer relocated");
+ for (const Value *V : SI.Ptrs) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed derived pointer found in statepoint");
+ }
}
+ } else {
+ assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
+ assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
}
#endif
@@ -539,30 +450,23 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// particular value. This is purely an optimization over the code below and
// doesn't change semantics at all. It is important for performance that we
// reserve slots for both deopt and gc values before lowering either.
- for (const Value *V : StatepointSite.vm_state_args()) {
+ for (const Value *V : SI.DeoptState) {
reservePreviousStackSlotForValue(V, Builder);
}
- for (unsigned i = 0; i < Bases.size(); ++i) {
- reservePreviousStackSlotForValue(Bases[i], Builder);
- reservePreviousStackSlotForValue(Ptrs[i], Builder);
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ reservePreviousStackSlotForValue(SI.Bases[i], Builder);
+ reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
}
// First, prefix the list with the number of unique values to be
// lowered. Note that this is the number of *Values* not the
// number of SDValues required to lower them.
- const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs();
+ const int NumVMSArgs = SI.DeoptState.size();
pushStackMapConstant(Ops, Builder, NumVMSArgs);
- assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(),
- StatepointSite.vm_state_end()));
-
- // The vm state arguments are lowered in an opaque manner. We do
- // not know what type of values are contained within. We skip the
- // first one since that happens to be the total number we lowered
- // explicitly just above. We could have left it in the loop and
- // not done it explicitly, but it's far easier to understand this
- // way.
- for (const Value *V : StatepointSite.vm_state_args()) {
+ // The vm state arguments are lowered in an opaque manner. We do not know
+ // what type of values are contained within.
+ for (const Value *V : SI.DeoptState) {
SDValue Incoming = Builder.getValue(V);
lowerIncomingStatepointValue(Incoming, Ops, Builder);
}
@@ -572,11 +476,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// arrays interwoven with each (lowered) base pointer immediately followed by
// it's (lowered) derived pointer. i.e
// (base[0], ptr[0], base[1], ptr[1], ...)
- for (unsigned i = 0; i < Bases.size(); ++i) {
- const Value *Base = Bases[i];
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ const Value *Base = SI.Bases[i];
lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
- const Value *Ptr = Ptrs[i];
+ const Value *Ptr = SI.Ptrs[i];
lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
}
@@ -585,7 +489,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// allocas and give control over placement to the consumer. In this case,
// it is the contents of the slot which may get updated, not the pointer to
// the alloca
- for (Value *V : StatepointSite.gc_args()) {
+ for (Value *V : SI.GCArgs) {
SDValue Incoming = Builder.getValue(V);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint
@@ -597,18 +501,16 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// Record computed locations for all lowered values.
// This can not be embedded in lowering loops as we need to record *all*
// values, while previous loops account only values with unique SDValues.
- const Instruction *StatepointInstr =
- StatepointSite.getCallSite().getInstruction();
- FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
- Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr];
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
- for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) {
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
const Value *V = Relocate->getDerivedPtr();
SDValue SDV = Builder.getValue(V);
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
if (Loc.getNode()) {
- SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+ SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
// and constants. For this values we can avoid emitting spill load while
@@ -616,7 +518,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// Actually we do not need to record them in this map at all.
// We do this only to check that we are not relocating any unvisited
// value.
- SpillMap[V] = None;
+ SpillMap.SlotMap[V] = None;
// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
@@ -630,16 +532,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
}
}
-void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
- // Check some preconditions for sanity
- assert(isStatepoint(&CI) &&
- "function called must be the statepoint function");
-
- LowerStatepoint(ImmutableStatepoint(&CI));
-}
-
-void SelectionDAGBuilder::LowerStatepoint(
- ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) {
+SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -648,36 +542,36 @@ void SelectionDAGBuilder::LowerStatepoint(
// Clear state
StatepointLowering.startNewStatepoint(*this);
- ImmutableCallSite CS(ISP.getCallSite());
-
#ifndef NDEBUG
- // Consistency check. Check only relocates in the same basic block as thier
- // statepoint.
- for (const User *U : CS->users()) {
- const CallInst *Call = cast<CallInst>(U);
- if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent())
- StatepointLowering.scheduleRelocCall(*Call);
- }
+ // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
+ // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
+ for (auto *Reloc : SI.GCRelocates)
+ if (Reloc->getParent() == SI.StatepointInstr->getParent())
+ StatepointLowering.scheduleRelocCall(*Reloc);
#endif
-#ifndef NDEBUG
- // If this is a malformed statepoint, report it early to simplify debugging.
- // This should catch any IR level mistake that's made when constructing or
- // transforming statepoints.
- ISP.verify();
-
- // Check that the associated GCStrategy expects to encounter statepoints.
- assert(GFI->getStrategy().useStatepoints() &&
- "GCStrategy does not expect to encounter statepoints");
-#endif
+ // Remove any redundant llvm::Values which map to the same SDValue as another
+ // input. Also has the effect of removing duplicates in the original
+ // llvm::Value input list as well. This is a useful optimization for
+ // reducing the size of the StackMap section. It has no other impact.
+ removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
+ FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
+ assert(SI.Bases.size() == SI.Ptrs.size() &&
+ SI.Ptrs.size() == SI.GCRelocates.size());
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
- lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this);
+ lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);
+
+ // Now that we've emitted the spills, we need to update the root so that the
+ // call sequence is ordered correctly.
+ SI.CLI.setChain(getRoot());
// Get call node, we will replace it later with statepoint
- SDNode *CallNode =
- lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports);
+ SDValue ReturnVal;
+ SDNode *CallNode;
+ std::tie(ReturnVal, CallNode) =
+ lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);
// Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
// nodes with all the appropriate arguments and return values.
@@ -700,8 +594,8 @@ void SelectionDAGBuilder::LowerStatepoint(
// followed by a SRCVALUE for the pointer that may be used during lowering
// (e.g. to form MachinePointerInfo values for loads/stores).
const bool IsGCTransition =
- (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) ==
- (uint64_t)StatepointFlags::GCTransition;
+ (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) ==
+ (uint64_t)StatepointFlags::GCTransition;
if (IsGCTransition) {
SmallVector<SDValue, 8> TSOps;
@@ -709,7 +603,7 @@ void SelectionDAGBuilder::LowerStatepoint(
TSOps.push_back(Chain);
// Add GC transition arguments
- for (const Value *V : ISP.gc_transition_args()) {
+ for (const Value *V : SI.GCTransitionArgs) {
TSOps.push_back(getValue(V));
if (V->getType()->isPointerTy())
TSOps.push_back(DAG.getSrcValue(V));
@@ -734,9 +628,9 @@ void SelectionDAGBuilder::LowerStatepoint(
SmallVector<SDValue, 40> Ops;
// Add the <id> and <numBytes> constants.
- Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64));
+ Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64));
Ops.push_back(
- DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32));
+ DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32));
// Calculate and push starting position of vmstate arguments
// Get number of arguments incoming directly into call node
@@ -758,13 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint(
Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
// Add a constant argument for the calling convention
- pushStackMapConstant(Ops, *this, CS.getCallingConv());
+ pushStackMapConstant(Ops, *this, SI.CLI.CallConv);
// Add a constant argument for the flags
- uint64_t Flags = ISP.getFlags();
- assert(
- ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0)
- && "unknown flag used");
+ uint64_t Flags = SI.StatepointFlags;
+ assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) &&
+ "Unknown flag used");
pushStackMapConstant(Ops, *this, Flags);
// Insert all vmstate and gcstate arguments
@@ -800,7 +693,7 @@ void SelectionDAGBuilder::LowerStatepoint(
TEOps.push_back(SDValue(StatepointMCNode, 0));
// Add GC transition arguments
- for (const Value *V : ISP.gc_transition_args()) {
+ for (const Value *V : SI.GCTransitionArgs) {
TEOps.push_back(getValue(V));
if (V->getType()->isPointerTy())
TEOps.push_back(DAG.getSrcValue(V));
@@ -830,19 +723,154 @@ void SelectionDAGBuilder::LowerStatepoint(
// return value of each gc.relocate to the respective output of the
// previously emitted STATEPOINT value. Unfortunately, this doesn't appear
// to actually be possible today.
+
+ return ReturnVal;
+}
+
+void
+SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+ const BasicBlock *EHPadBB /*= nullptr*/) {
+ assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg &&
+ "anyregcc is not supported on statepoints!");
+
+#ifndef NDEBUG
+ // If this is a malformed statepoint, report it early to simplify debugging.
+ // This should catch any IR level mistake that's made when constructing or
+ // transforming statepoints.
+ ISP.verify();
+
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+#endif
+
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &DL = DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS));
+ } else {
+ ActualCallee = getValue(ISP.getCalledValue());
+ }
+
+ StatepointLoweringInfo SI(DAG);
+ populateCallLoweringInfo(SI.CLI, ISP.getCallSite(),
+ ImmutableStatepoint::CallArgsBeginPos,
+ ISP.getNumCallArgs(), ActualCallee,
+ ISP.getActualReturnType(), false /* IsPatchPoint */);
+
+ for (const GCRelocateInst *Relocate : ISP.getRelocates()) {
+ SI.GCRelocates.push_back(Relocate);
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
+
+ SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.StatepointInstr = ISP.getInstruction();
+ SI.GCTransitionArgs =
+ ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.ID = ISP.getID();
+ SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end());
+ SI.StatepointFlags = ISP.getFlags();
+ SI.NumPatchBytes = ISP.getNumPatchBytes();
+ SI.EHPadBB = EHPadBB;
+
+ SDValue ReturnValue = LowerAsSTATEPOINT(SI);
+
+ // Export the result value if needed
+ const GCResultInst *GCResult = ISP.getGCResult();
+ Type *RetTy = ISP.getActualReturnType();
+ if (!RetTy->isVoidTy() && GCResult) {
+ if (GCResult->getParent() != ISP.getCallSite().getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now. Default exporting mechanism will not work here because
+ // statepoint call has a different type than the actual call. It means
+ // that by default llvm will create export register of the wrong type
+ // (always i32 in our case). So instead we need to create export register
+ // with correct type manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy);
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[ISP.getInstruction()] = Reg;
+ } else {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
+ // this value.
+ setValue(ISP.getInstruction(), ReturnValue);
+ }
+ } else {
+ // The token value is never used from here on, just generate a poison value
+ setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
+ ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
+ bool VarArgDisallowed, bool ForceVoidReturnTy) {
+ StatepointLoweringInfo SI(DAG);
+ unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
+ populateCallLoweringInfo(
+ SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
+ false);
+ if (!VarArgDisallowed)
+ SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();
+
+ auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);
+
+ unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
+
+ auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ SI.ID = SD.StatepointID.getValueOr(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
+
+ SI.DeoptState =
+ ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
+ SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
+ SI.EHPadBB = EHPadBB;
+
+ // NB! The GC arguments are deliberately left empty.
+
+ if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
+ const Instruction *Inst = CS.getInstruction();
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
+ setValue(Inst, ReturnVal);
+ }
}
-void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
+ ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB,
+ /* VarArgDisallowed = */ false,
+ /* ForceVoidReturnTy = */ false);
+}
+
+void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
// The result value of the gc_result is simply the result of the actual
// call. We've already emitted this, so just grab the value.
- Instruction *I = cast<Instruction>(CI.getArgOperand(0));
- assert(isStatepoint(I) && "first argument must be a statepoint token");
+ const Instruction *I = CI.getStatepoint();
if (I->getParent() != CI.getParent()) {
// Statepoint is in different basic block so we should have stored call
// result in a virtual register.
// We can not use default getValue() functionality to copy value from this
- // register because statepoint and actuall call return types can be
+ // register because statepoint and actual call return types can be
// different, and getValue() will use CopyFromReg of the wrong type,
// which is always i32 in our case.
PointerType *CalleeType = cast<PointerType>(
@@ -864,20 +892,21 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
// We skip this check for relocates not in the same basic block as thier
// statepoint. It would be too expensive to preserve validation info through
// different basic blocks.
- if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) {
+ if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
StatepointLowering.relocCallVisited(Relocate);
- }
+
+ auto *Ty = Relocate.getType()->getScalarType();
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ assert(*IsManaged && "Non gc managed pointer relocated!");
#endif
const Value *DerivedPtr = Relocate.getDerivedPtr();
SDValue SD = getValue(DerivedPtr);
- FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
- FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()];
-
- // We should have recorded location for this pointer
- assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value");
- Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr];
+ auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
+ auto SlotIt = SpillMap.find(DerivedPtr);
+ assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
+ Optional<int> DerivedPtrLocation = SlotIt->second;
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
@@ -897,8 +926,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
SDValue SpillLoad =
DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- *DerivedPtrLocation),
- false, false, false, 0);
+ *DerivedPtrLocation));
// Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
@@ -906,3 +934,25 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
assert(SpillLoad.getNode());
setValue(&Relocate, SpillLoad);
}
+
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // We don't lower calls to __llvm_deoptimize as varargs, but as a regular
+ // call. We also do not lower the return value to any virtual register, and
+ // change the immediately following return to a trap instruction.
+ LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr,
+ /* VarArgDisallowed = */ true,
+ /* ForceVoidReturnTy = */ true);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizingReturn() {
+ // We do not lower the return value from llvm.deoptimize to any virtual
+ // register, and change the immediately following return to a trap
+ // instruction.
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 82d0c62..b043184 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -16,9 +16,9 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include <vector>
namespace llvm {
class SelectionDAGBuilder;
@@ -45,15 +45,17 @@ public:
/// statepoint. Will return SDValue() if this value hasn't been
/// spilled. Otherwise, the value has already been spilled and no
/// further action is required by the caller.
- SDValue getLocation(SDValue val) {
- if (!Locations.count(val))
+ SDValue getLocation(SDValue Val) {
+ auto I = Locations.find(Val);
+ if (I == Locations.end())
return SDValue();
- return Locations[val];
+ return I->second;
}
- void setLocation(SDValue val, SDValue Location) {
- assert(!Locations.count(val) &&
+
+ void setLocation(SDValue Val, SDValue Location) {
+ assert(!Locations.count(Val) &&
"Trying to allocate already allocated location");
- Locations[val] = Location;
+ Locations[Val] = Location;
}
/// Record the fact that we expect to encounter a given gc_relocate
@@ -62,16 +64,15 @@ public:
void scheduleRelocCall(const CallInst &RelocCall) {
PendingGCRelocateCalls.push_back(&RelocCall);
}
+
/// Remove this gc_relocate from the list we're expecting to see
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
- SmallVectorImpl<const CallInst *>::iterator itr =
- std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(),
- &RelocCall);
- assert(itr != PendingGCRelocateCalls.end() &&
+ auto I = find(PendingGCRelocateCalls, &RelocCall);
+ assert(I != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
- PendingGCRelocateCalls.erase(itr);
+ PendingGCRelocateCalls.erase(I);
}
// TODO: Should add consistency tracking to ensure we encounter
@@ -84,14 +85,15 @@ public:
void reserveStackSlot(int Offset) {
assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
"out of bounds");
- assert(!AllocatedStackSlots[Offset] && "already reserved!");
+ assert(!AllocatedStackSlots.test(Offset) && "already reserved!");
assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
- AllocatedStackSlots[Offset] = true;
+ AllocatedStackSlots.set(Offset);
}
+
bool isStackSlotAllocated(int Offset) {
assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
"out of bounds");
- return AllocatedStackSlots[Offset];
+ return AllocatedStackSlots.test(Offset);
}
private:
@@ -103,7 +105,7 @@ private:
/// whether it has been used in the current statepoint. Since we try to
/// preserve stack slots across safepoints, there can be gaps in which
/// slots have been allocated.
- SmallVector<bool, 50> AllocatedStackSlots;
+ SmallBitVector AllocatedStackSlots;
/// Points just beyond the last slot known to have been allocated
unsigned NextSlotToAllocate;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c64d882..806646f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -14,10 +14,11 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -25,7 +26,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -43,6 +43,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
+bool TargetLowering::isPositionIndependent() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
/// Check whether a given call node is in tail position within its function. If
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
@@ -65,6 +69,31 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
return isUsedByReturnOnly(Node, Chain);
}
+bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
+ const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<SDValue> &OutVals) const {
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &ArgLoc = ArgLocs[I];
+ if (!ArgLoc.isRegLoc())
+ continue;
+ unsigned Reg = ArgLoc.getLocReg();
+ // Only look at callee saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+ // Check that we pass the value used for the caller.
+ // (We look for a CopyFromReg reading a virtual register that is used
+ // for the function live-in value of register Reg)
+ SDValue Value = OutVals[I];
+ if (Value->getOpcode() != ISD::CopyFromReg)
+ return false;
+ unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ if (MRI.getLiveInPhysReg(ArgReg) != Reg)
+ return false;
+ }
+ return true;
+}
+
/// \brief Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
@@ -77,17 +106,17 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
+ isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
Alignment = CS->getParamAlignment(AttrIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
/// result of type RetVT.
std::pair<SDValue, SDValue>
-TargetLowering::makeLibCall(SelectionDAG &DAG,
- RTLIB::Libcall LC, EVT RetVT,
- ArrayRef<SDValue> Ops,
- bool isSigned, SDLoc dl,
- bool doesNotReturn,
+TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
+ ArrayRef<SDValue> Ops, bool isSigned,
+ const SDLoc &dl, bool doesNotReturn,
bool isReturnValueUsed) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -110,7 +139,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
.setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
@@ -121,8 +150,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
- SDLoc dl) const {
- assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ const SDLoc &dl) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
// Expand into one or more soft-fp libcall(s).
@@ -132,53 +161,65 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
case ISD::SETEQ:
case ISD::SETOEQ:
LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
- (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
case ISD::SETNE:
case ISD::SETUNE:
LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
- (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128;
+ (VT == MVT::f64) ? RTLIB::UNE_F64 :
+ (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
break;
case ISD::SETGE:
case ISD::SETOGE:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
- (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETLT:
case ISD::SETOLT:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
case ISD::SETLE:
case ISD::SETOLE:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
- (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETGT:
case ISD::SETOGT:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
- (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
case ISD::SETO:
LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
- (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
+ (VT == MVT::f64) ? RTLIB::O_F64 :
+ (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
break;
case ISD::SETONE:
// SETONE = SETOLT | SETOGT
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
- (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
- (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
break;
default:
// Invert CC for unordered comparisons
@@ -186,19 +227,23 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
switch (CCCode) {
case ISD::SETULT:
LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
- (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
break;
case ISD::SETULE:
LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
case ISD::SETUGT:
LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
- (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
break;
case ISD::SETUGE:
LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
break;
default: llvm_unreachable("Do not know how to soften this setcc!");
}
@@ -235,7 +280,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ if (!isPositionIndependent())
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
@@ -269,17 +314,20 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
bool
TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // Assume that everything is safe in static mode.
- if (getTargetMachine().getRelocationModel() == Reloc::Static)
- return true;
+ const TargetMachine &TM = getTargetMachine();
+ const GlobalValue *GV = GA->getGlobal();
- // In dynamic-no-pic mode, assume that known defined values are safe.
- if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
- GA && GA->getGlobal()->isStrongDefinitionForLinker())
- return true;
+ // If the address is not even local to this DSO we will have to load it from
+ // a got and then add the offset.
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return false;
- // Otherwise assume nothing is safe.
- return false;
+ // If the code is position independent we will have to add a base register.
+ if (isPositionIndependent())
+ return false;
+
+ // Otherwise we can do it.
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -326,11 +374,10 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
-bool
-TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
- unsigned BitWidth,
- const APInt &Demanded,
- SDLoc dl) {
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ const SDLoc &dl) {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
@@ -407,7 +454,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
NewMask = APInt::getAllOnesValue(BitWidth);
} else if (DemandedMask == 0) {
// Not demanding any bits from Op.
- if (Op.getOpcode() != ISD::UNDEF)
+ if (!Op.isUndef())
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
return false;
} else if (Depth == 6) { // Limit search depth.
@@ -1157,37 +1204,6 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
-/// Test if the given value is known to have exactly one bit set. This differs
-/// from computeKnownBits in that it doesn't need to determine which bit is set.
-static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
- // A left-shift of a constant one will have exactly one bit set, because
- // shifting the bit off the end is undefined.
- if (Val.getOpcode() == ISD::SHL)
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
- if (C->getAPIntValue() == 1)
- return true;
-
- // Similarly, a right-shift of a constant sign-bit will have exactly
- // one bit set.
- if (Val.getOpcode() == ISD::SRL)
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
- if (C->getAPIntValue().isSignBit())
- return true;
-
- // More could be done here, though the above checks are enough
- // to handle some common cases.
-
- // Fall back to computeKnownBits to catch other known cases.
- EVT OpVT = Val.getValueType();
- unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
- APInt KnownZero, KnownOne;
- DAG.computeKnownBits(Val, KnownZero, KnownOne);
- return (KnownZero.countPopulation() == BitWidth - 1) &&
- (KnownOne.countPopulation() == 1);
-}
-
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
@@ -1218,6 +1234,16 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
+SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
+ const SDLoc &DL) const {
+ unsigned ElementWidth = VT.getScalarSizeInBits();
+ APInt TrueInt =
+ getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
+ ? APInt(ElementWidth, 1)
+ : APInt::getAllOnesValue(ElementWidth);
+ return DAG.getConstant(TrueInt, DL, VT);
+}
+
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
@@ -1242,12 +1268,91 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
return CN->isNullValue();
}
+bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
+ bool SExt) const {
+ if (VT == MVT::i1)
+ return N->isOne();
+
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
+ switch (Cnt) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // An extended value of 1 is always true, unless its original type is i1,
+ // in which case it will be sign extended to -1.
+ return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return N->isAllOnesValue() && SExt;
+ }
+ llvm_unreachable("Unexpected enumeration.");
+}
+
+/// This helper function of SimplifySetCC tries to optimize the comparison when
+/// either operand of the SetCC node is a bitwise-and instruction.
+SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // Match these patterns in any of their permutations:
+ // (X & Y) == Y
+ // (X & Y) != Y
+ if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+ std::swap(N0, N1);
+
+ EVT OpVT = N0.getValueType();
+ if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
+ (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+ return SDValue();
+
+ SDValue X, Y;
+ if (N0.getOperand(0) == N1) {
+ X = N0.getOperand(1);
+ Y = N0.getOperand(0);
+ } else if (N0.getOperand(1) == N1) {
+ X = N0.getOperand(0);
+ Y = N0.getOperand(1);
+ } else {
+ return SDValue();
+ }
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
+ // Note that where Y is variable and is known to have at most one bit set
+ // (for example, if it is Z & 1) we cannot do this; the expressions are not
+ // equivalent when Y == 0.
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType()))
+ return DAG.getSetCC(DL, VT, N0, Zero, Cond);
+ } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
+ // If the target supports an 'and-not' or 'and-complement' logic operation,
+ // try to use that to make a comparison operation more efficient.
+ // But don't do this transform if the mask is a single bit because there are
+ // more efficient ways to deal with that case (for example, 'bt' on x86 or
+ // 'rlwinm' on PPC).
+
+ // Bail out if the compare operand that we want to turn into a zero is
+ // already a zero (otherwise, infinite loop).
+ auto *YConst = dyn_cast<ConstantSDNode>(Y);
+ if (YConst && YConst->isNullValue())
+ return SDValue();
+
+ // Transform this into: ~X & Y == 0.
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+ return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
-SDValue
-TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
- ISD::CondCode Cond, bool foldBooleans,
- DAGCombinerInfo &DCI, SDLoc dl) const {
+SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI,
+ const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
// These setcc operations always fold.
@@ -1376,6 +1481,38 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
+
+ // If truncating the setcc operands is not desirable, we can still
+ // simplify the expression in some cases:
+ // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
+ // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
+ // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
+ // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
+ SDValue TopSetCC = N0->getOperand(0);
+ unsigned N0Opc = N0->getOpcode();
+ bool SExt = (N0Opc == ISD::SIGN_EXTEND);
+ if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
+ TopSetCC.getOpcode() == ISD::SETCC &&
+ (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
+ (isConstFalseVal(N1C) ||
+ isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
+
+ bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
+ (!N1C->isNullValue() && Cond == ISD::SETNE);
+
+ if (!Inverse)
+ return TopSetCC;
+
+ ISD::CondCode InvCond = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
+ TopSetCC.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
+ TopSetCC.getOperand(1),
+ InvCond);
+
+ }
}
}
@@ -1426,9 +1563,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
DAG.getConstant(bestOffset, dl, PtrType));
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
- SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getPointerInfo().getWithOffset(bestOffset),
- false, false, false, NewAlign);
+ SDValue NewLoad = DAG.getLoad(
+ newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -1994,32 +2131,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
- // Note that where y is variable and is known to have at most
- // one bit set (for example, if it is z&1) we cannot do this;
- // the expressions are not equivalent when y==0.
- if (N0.getOpcode() == ISD::AND)
- if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
- if (ValueHasExactlyOneBitSet(N1, DAG)) {
- Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- if (DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(Cond, N0.getSimpleValueType())) {
- SDValue Zero = DAG.getConstant(0, dl, N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, Zero, Cond);
- }
- }
- }
- if (N1.getOpcode() == ISD::AND)
- if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
- if (ValueHasExactlyOneBitSet(N0, DAG)) {
- Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
- if (DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(Cond, N1.getSimpleValueType())) {
- SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
- return DAG.getSetCC(dl, VT, N1, Zero, Cond);
- }
- }
- }
+ if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
+ return V;
}
// Fold away ALL boolean setcc's.
@@ -2202,8 +2315,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
}
- if (!C || !GA)
- C = nullptr, GA = nullptr;
+ if (!C || !GA) {
+ C = nullptr;
+ GA = nullptr;
+ }
}
// If we find a valid operand, map to the TargetXXX version so that the
@@ -2260,7 +2375,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
- if (RegName.equals_lower(RI->getName(*I))) {
+ if (RegName.equals_lower(RI->getRegAsmName(*I))) {
std::pair<unsigned, const TargetRegisterClass*> S =
std::make_pair(*I, RC);
@@ -2680,7 +2795,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// \brief Given an exact SDIV by a constant, create a multiplication
/// with the multiplicative inverse of the constant.
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
- SDLoc dl, SelectionDAG &DAG,
+ const SDLoc &dl, SelectionDAG &DAG,
std::vector<SDNode *> &Created) {
assert(d != 0 && "Division by zero!");
@@ -3039,6 +3154,370 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
return true;
}
+SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
+ SDLoc SL(LD);
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+
+ unsigned Stride = SrcEltVT.getSizeInBits() / 8;
+ assert(SrcEltVT.isByteSized());
+
+ EVT PtrVT = BasePTR.getValueType();
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue ScalarLoad =
+ DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
+ DAG.getConstant(Stride, SL, PtrVT));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
+
+ return DAG.getMergeValues({ Value, NewChain }, SL);
+}
+
+// FIXME: This relies on each element having a byte size, otherwise the stride
+// is 0 and just overwrites the same location. ExpandStore currently expects
+// this broken behavior.
+SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ SDLoc SL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ EVT PtrVT = BasePtr.getValueType();
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ // Extract each of the elements from the original vector and save them into
+ // memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Idx * Stride, SL, PtrVT));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(
+ Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
+ MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ Stores.push_back(Store);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
+}
+
+std::pair<SDValue, SDValue>
+TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+ // Scalarize the load and let the individual components be handled.
+ SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
+ return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ }
+
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ return std::make_pair(Result, newLoad.getValue(1));
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ EVT PtrVT = Ptr.getValueType();
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
+ StackPtrIncrement);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset), MemVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ return std::make_pair(Load, TF);
+ }
+
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (DAG.getDataLayout().isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ return std::make_pair(Result, TF);
+}
+
+SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+
+ SDLoc dl(ST);
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (isTypeLegal(intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+ // Scalarize the store and let the individual components be handled.
+ SDValue Result = scalarizeVectorStore(ST, DAG);
+
+ return Result;
+ }
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ Alignment, ST->getMemOperand()->getFlags());
+ return Result;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
+ MachinePointerInfo(), StoredVT);
+
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load =
+ DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
+ StackPtr, StackPtrIncrement);
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(), MemVT);
+
+ Stores.push_back(
+ DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset), MemVT,
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo()));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ return Result;
+ }
+
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl,
+ DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+ Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags());
+
+ EVT PtrVT = Ptr.getValueType();
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+ DAG.getConstant(IncrementSize, dl, PtrVT));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(
+ Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//
@@ -3057,9 +3536,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
StringRef EmuTlsVarName(NameString);
GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
- if (!EmuTlsVar)
- EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
- VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+ assert(EmuTlsVar && "Cannot find EmuTlsVar ");
Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
Entry.Ty = VoidPtrType;
Args.push_back(Entry);
@@ -3068,7 +3545,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
- CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0);
+ CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
deleted file mode 100644
index b12e943..0000000
--- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements lowering for the llvm.gc* intrinsics for targets that do
-// not natively support them (which includes the C backend). Note that the code
-// generated is not quite as efficient as algorithms which generate stack maps
-// to identify roots.
-//
-// This pass implements the code transformation described in this paper:
-// "Accurate Garbage Collection in an Uncooperative Environment"
-// Fergus Henderson, ISMM, 2002
-//
-// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
-// ShadowStackGC.
-//
-// In order to support this particular transformation, all stack roots are
-// coallocated in the stack. This allows a fully target-independent stack map
-// while introducing only minor runtime overhead.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCs.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "shadowstackgc"
-
-namespace {
-class ShadowStackGC : public GCStrategy {
-public:
- ShadowStackGC();
-};
-}
-
-static GCRegistry::Add<ShadowStackGC>
- X("shadow-stack", "Very portable GC for uncooperative code generators");
-
-void llvm::linkShadowStackGC() {}
-
-ShadowStackGC::ShadowStackGC() {
- InitRoots = true;
- CustomRoots = true;
-}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 878eeee..1efc440 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -8,7 +8,11 @@
//===----------------------------------------------------------------------===//
//
// This file contains the custom lowering code required by the shadow-stack GC
-// strategy.
+// strategy.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index e1f242a..ce01c5f 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -24,18 +23,11 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "sjljehprepare"
@@ -55,7 +47,6 @@ class SjLjEHPrepare : public FunctionPass {
Constant *StackAddrFn;
Constant *StackRestoreFn;
Constant *LSDAAddrFn;
- Value *PersonalityFn;
Constant *CallSiteFn;
Constant *FuncCtxFn;
AllocaInst *FuncCtx;
@@ -103,21 +94,6 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
VoidPtrTy, // __lsda
doubleUnderJBufTy, // __jbuf
nullptr);
- RegisterFn = M.getOrInsertFunction(
- "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
- UnregisterFn = M.getOrInsertFunction(
- "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
- FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
- StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
- StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
- BuiltinSetupDispatchFn =
- Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
- LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
- CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
- FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
- PersonalityFn = nullptr;
return true;
}
@@ -141,15 +117,15 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/);
}
-/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// MarkBlocksLiveIn - Insert BB and all of its predecessors into LiveBBs until
/// we reach blocks we've already seen.
static void MarkBlocksLiveIn(BasicBlock *BB,
SmallPtrSetImpl<BasicBlock *> &LiveBBs) {
if (!LiveBBs.insert(BB).second)
return; // already been here.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- MarkBlocksLiveIn(*PI, LiveBBs);
+ for (BasicBlock *PredBB : predecessors(BB))
+ MarkBlocksLiveIn(PredBB, LiveBBs);
}
/// substituteLPadValues - Substitute the values returned by the landingpad
@@ -159,7 +135,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
- ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ auto *EVI = dyn_cast<ExtractValueInst>(Val);
if (!EVI)
continue;
if (EVI->getNumIndices() != 1)
@@ -168,11 +144,11 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
EVI->replaceAllUsesWith(ExnVal);
else if (*EVI->idx_begin() == 1)
EVI->replaceAllUsesWith(SelVal);
- if (EVI->getNumUses() == 0)
+ if (EVI->use_empty())
EVI->eraseFromParent();
}
- if (LPI->getNumUses() == 0)
+ if (LPI->use_empty())
return;
// There are still some uses of LPI. Construct an aggregate with the exception
@@ -202,8 +178,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
&EntryBB->front());
// Fill in the function context structure.
- for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
- LandingPadInst *LPI = LPads[I];
+ for (LandingPadInst *LPI : LPads) {
IRBuilder<> Builder(LPI->getParent(),
LPI->getParent()->getFirstInsertionPt());
@@ -226,8 +201,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// Personality function
IRBuilder<> Builder(EntryBB->getTerminator());
- if (!PersonalityFn)
- PersonalityFn = F.getPersonalityFn();
+ Value *PersonalityFn = F.getPersonalityFn();
Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
Builder.CreateStore(
@@ -250,7 +224,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
while (isa<AllocaInst>(AfterAllocaInsPt) &&
- isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
+ cast<AllocaInst>(AfterAllocaInsPt)->isStaticAlloca())
++AfterAllocaInsPt;
assert(AfterAllocaInsPt != F.front().end());
@@ -274,40 +248,37 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
ArrayRef<InvokeInst *> Invokes) {
// Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE;
- ++II) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : BB) {
// Ignore obvious cases we don't have to handle. In particular, most
// instructions either have no uses or only have a single use inside the
// current block. Ignore them quickly.
- Instruction *Inst = &*II;
- if (Inst->use_empty())
+ if (Inst.use_empty())
continue;
- if (Inst->hasOneUse() &&
- cast<Instruction>(Inst->user_back())->getParent() == BB &&
- !isa<PHINode>(Inst->user_back()))
+ if (Inst.hasOneUse() &&
+ cast<Instruction>(Inst.user_back())->getParent() == &BB &&
+ !isa<PHINode>(Inst.user_back()))
continue;
// If this is an alloca in the entry block, it's not a real register
// value.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
- if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
+ if (auto *AI = dyn_cast<AllocaInst>(&Inst))
+ if (AI->isStaticAlloca())
continue;
// Avoid iterator invalidation by copying users to a temporary vector.
SmallVector<Instruction *, 16> Users;
- for (User *U : Inst->users()) {
+ for (User *U : Inst.users()) {
Instruction *UI = cast<Instruction>(U);
- if (UI->getParent() != BB || isa<PHINode>(UI))
+ if (UI->getParent() != &BB || isa<PHINode>(UI))
Users.push_back(UI);
}
// Find all of the blocks that this value is live in.
- SmallPtrSet<BasicBlock *, 64> LiveBBs;
- LiveBBs.insert(Inst->getParent());
+ SmallPtrSet<BasicBlock *, 32> LiveBBs;
+ LiveBBs.insert(&BB);
while (!Users.empty()) {
- Instruction *U = Users.back();
- Users.pop_back();
+ Instruction *U = Users.pop_back_val();
if (!isa<PHINode>(U)) {
MarkBlocksLiveIn(U->getParent(), LiveBBs);
@@ -315,7 +286,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
// Uses for a PHI node occur in their predecessor block.
PHINode *PN = cast<PHINode>(U);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Inst)
+ if (PN->getIncomingValue(i) == &Inst)
MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
}
}
@@ -323,10 +294,10 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
// Now that we know all of the blocks that this thing is live in, see if
// it includes any of the unwind locations.
bool NeedsSpill = false;
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
- DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
+ if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
<< UnwindBlock->getName() << "\n");
NeedsSpill = true;
break;
@@ -338,15 +309,15 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
// the value to be reloaded from the stack slot, even those that aren't
// in the unwind blocks. We should be more selective.
if (NeedsSpill) {
- DemoteRegToStack(*Inst, true);
+ DemoteRegToStack(Inst, true);
++NumSpilled;
}
}
}
// Go through the landing pads and remove any PHIs there.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
// Place PHIs into a set to avoid invalidating the iterator.
@@ -374,11 +345,10 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
SmallSetVector<LandingPadInst *, 16> LPads;
// Look through the terminators of the basic blocks to find invokes.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ for (BasicBlock &BB : F)
+ if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
if (Function *Callee = II->getCalledFunction())
- if (Callee->isIntrinsic() &&
- Callee->getIntrinsicID() == Intrinsic::donothing) {
+ if (Callee->getIntrinsicID() == Intrinsic::donothing) {
// Remove the NOP invoke.
BranchInst::Create(II->getNormalDest(), II);
II->eraseFromParent();
@@ -387,7 +357,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Invokes.push_back(II);
LPads.insert(II->getUnwindDest()->getLandingPadInst());
- } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
Returns.push_back(RI);
}
@@ -448,14 +418,13 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
// created for this function and any unexpected exceptions thrown will go
// directly to the caller's context, which is what we want anyway, so no need
// to do anything here.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
- for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!CI->doesNotThrow())
- insertCallSiteStore(CI, -1);
- } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
- insertCallSiteStore(RI, -1);
- }
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
+ continue;
+ for (Instruction &I : BB)
+ if (I.mayThrow())
+ insertCallSiteStore(&I, -1);
+ }
// Register the function context and make sure it's known to not throw
CallInst *Register =
@@ -464,18 +433,18 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
// Following any allocas not in the entry block, update the saved SP in the
// jmpbuf to the new value.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (BB == F.begin())
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
continue;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ for (Instruction &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
if (CI->getCalledFunction() != StackRestoreFn)
continue;
- } else if (!isa<AllocaInst>(I)) {
+ } else if (!isa<AllocaInst>(&I)) {
continue;
}
Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(&*I);
+ StackAddr->insertAfter(&I);
Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
StoreStackAddr->insertAfter(StackAddr);
}
@@ -483,13 +452,29 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
- for (unsigned I = 0, E = Returns.size(); I != E; ++I)
- CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);
+ for (ReturnInst *Return : Returns)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Return);
return true;
}
bool SjLjEHPrepare::runOnFunction(Function &F) {
+ Module &M = *F.getParent();
+ RegisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), nullptr);
+ UnregisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), nullptr);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetupDispatchFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+
bool Res = setupEntryBlockAndCallSites(F);
return Res;
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index c9d23f6..dba103e9 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -69,34 +69,29 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
indexList.push_back(createEntry(nullptr, index));
// Iterate over the function.
- for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
- mbbItr != mbbEnd; ++mbbItr) {
- MachineBasicBlock *mbb = &*mbbItr;
-
+ for (MachineBasicBlock &MBB : *mf) {
// Insert an index for the MBB start.
SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
- for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
- miItr != miEnd; ++miItr) {
- MachineInstr *mi = miItr;
- if (mi->isDebugValue())
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
continue;
// Insert a store index for the instr.
- indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist));
+ indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist));
// Save this base index in the maps.
- mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(),
- SlotIndex::Slot_Block)));
+ mi2iMap.insert(std::make_pair(
+ &MI, SlotIndex(&indexList.back(), SlotIndex::Slot_Block)));
}
// We insert one blank instructions between basic blocks.
indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist));
- MBBRanges[mbb->getNumber()].first = blockStartIndex;
- MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(),
+ MBBRanges[MBB.getNumber()].first = blockStartIndex;
+ MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(),
SlotIndex::Slot_Block);
- idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, &MBB));
}
// Sort the Idx2MBBMap
@@ -150,9 +145,9 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
// does the same thing.
// Find anchor points, which are at the beginning/end of blocks or at
// instructions that already have indexes.
- while (Begin != MBB->begin() && !hasIndex(Begin))
+ while (Begin != MBB->begin() && !hasIndex(*Begin))
--Begin;
- while (End != MBB->end() && !hasIndex(End))
+ while (End != MBB->end() && !hasIndex(*End))
++End;
bool includeStart = (Begin == MBB->begin());
@@ -160,13 +155,13 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
if (includeStart)
startIdx = getMBBStartIdx(MBB);
else
- startIdx = getInstructionIndex(Begin);
+ startIdx = getInstructionIndex(*Begin);
SlotIndex endIdx;
if (End == MBB->end())
endIdx = getMBBEndIdx(MBB);
else
- endIdx = getInstructionIndex(End);
+ endIdx = getInstructionIndex(*End);
// FIXME: Conceptually, this code is implementing an iterator on MBB that
// optionally includes an additional position prior to MBB->begin(), indicated
@@ -182,7 +177,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
"Decremented past the beginning of region to repair.");
MachineInstr *SlotMI = ListI->getInstr();
- MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr;
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
if (SlotMI == MI && !MBBIAtBegin) {
@@ -199,7 +194,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
} else {
--ListI;
if (SlotMI)
- removeMachineInstrFromMaps(SlotMI);
+ removeMachineInstrFromMaps(*SlotMI);
}
}
@@ -207,14 +202,14 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
// to update the IndexList while we are iterating it.
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
- MachineInstr *MI = I;
- if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end())
+ MachineInstr &MI = *I;
+ if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end())
insertMachineInstrInMaps(MI);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void SlotIndexes::dump() const {
+LLVM_DUMP_METHOD void SlotIndexes::dump() const {
for (IndexList::const_iterator itr = indexList.begin();
itr != indexList.end(); ++itr) {
dbgs() << itr->getIndex() << " ";
@@ -242,7 +237,7 @@ void SlotIndex::print(raw_ostream &os) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// Dump a SlotIndex to stderr.
-void SlotIndex::dump() const {
+LLVM_DUMP_METHOD void SlotIndex::dump() const {
print(dbgs());
dbgs() << "\n";
}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
index d30cfc2..f10c98e 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -173,6 +173,17 @@ struct SpillPlacement::Node {
Value = 0;
return Before != preferReg();
}
+
+ void getDissentingNeighbors(SparseSet<unsigned> &List,
+ const Node nodes[]) const {
+ for (const auto &Elt : Links) {
+ unsigned n = Elt.second;
+ // Neighbors that already have the same value are not going to
+ // change because of this node changing.
+ if (Value != nodes[n].Value)
+ List.insert(n);
+ }
+ }
};
bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
@@ -182,6 +193,8 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
assert(!nodes && "Leaking node array");
nodes = new Node[bundles->getNumBundles()];
+ TodoList.clear();
+ TodoList.setUniverse(bundles->getNumBundles());
// Compute total ingoing and outgoing block frequencies for all bundles.
BlockFrequencies.resize(mf.getNumBlockIDs());
@@ -199,10 +212,12 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
void SpillPlacement::releaseMemory() {
delete[] nodes;
nodes = nullptr;
+ TodoList.clear();
}
/// activate - mark node n as active if it wasn't already.
void SpillPlacement::activate(unsigned n) {
+ TodoList.insert(n);
if (ActiveNodes->test(n))
return;
ActiveNodes->set(n);
@@ -287,10 +302,6 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
continue;
activate(ib);
activate(ob);
- if (nodes[ib].Links.empty() && !nodes[ib].mustSpill())
- Linked.push_back(ib);
- if (nodes[ob].Links.empty() && !nodes[ob].mustSpill())
- Linked.push_back(ob);
BlockFrequency Freq = BlockFrequencies[Number];
nodes[ib].addLink(ob, Freq);
nodes[ob].addLink(ib, Freq);
@@ -298,76 +309,50 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
}
bool SpillPlacement::scanActiveBundles() {
- Linked.clear();
RecentPositive.clear();
for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
- nodes[n].update(nodes, Threshold);
+ update(n);
// A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations.
if (nodes[n].mustSpill())
continue;
- if (!nodes[n].Links.empty())
- Linked.push_back(n);
if (nodes[n].preferReg())
RecentPositive.push_back(n);
}
return !RecentPositive.empty();
}
+bool SpillPlacement::update(unsigned n) {
+ if (!nodes[n].update(nodes, Threshold))
+ return false;
+ nodes[n].getDissentingNeighbors(TodoList, nodes);
+ return true;
+}
+
/// iterate - Repeatedly update the Hopfield nodes until stability or the
/// maximum number of iterations is reached.
-/// @param Linked - Numbers of linked nodes that need updating.
void SpillPlacement::iterate() {
- // First update the recently positive nodes. They have likely received new
- // negative bias that will turn them off.
- while (!RecentPositive.empty())
- nodes[RecentPositive.pop_back_val()].update(nodes, Threshold);
-
- if (Linked.empty())
- return;
+ // We do not need to push those node in the todolist.
+ // They are already been proceeded as part of the previous iteration.
+ RecentPositive.clear();
- // Run up to 10 iterations. The edge bundle numbering is closely related to
- // basic block numbering, so there is a strong tendency towards chains of
- // linked nodes with sequential numbers. By scanning the linked nodes
- // backwards and forwards, we make it very likely that a single node can
- // affect the entire network in a single iteration. That means very fast
- // convergence, usually in a single iteration.
- for (unsigned iteration = 0; iteration != 10; ++iteration) {
- // Scan backwards, skipping the last node when iteration is not zero. When
- // iteration is not zero, the last node was just updated.
- bool Changed = false;
- for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
- iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()),
- E = Linked.rend(); I != E; ++I) {
- unsigned n = *I;
- if (nodes[n].update(nodes, Threshold)) {
- Changed = true;
- if (nodes[n].preferReg())
- RecentPositive.push_back(n);
- }
- }
- if (!Changed || !RecentPositive.empty())
- return;
-
- // Scan forwards, skipping the first node which was just updated.
- Changed = false;
- for (SmallVectorImpl<unsigned>::const_iterator I =
- std::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
- unsigned n = *I;
- if (nodes[n].update(nodes, Threshold)) {
- Changed = true;
- if (nodes[n].preferReg())
- RecentPositive.push_back(n);
- }
- }
- if (!Changed || !RecentPositive.empty())
- return;
+ // Since the last iteration, the todolist have been augmented by calls
+ // to addConstraints, addLinks, and co.
+ // Update the network energy starting at this new frontier.
+ // The call to ::update will add the nodes that changed into the todolist.
+ unsigned Limit = bundles->getNumBundles() * 10;
+ while(Limit-- > 0 && !TodoList.empty()) {
+ unsigned n = TodoList.pop_back_val();
+ if (!update(n))
+ continue;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
}
}
void SpillPlacement::prepare(BitVector &RegBundles) {
- Linked.clear();
RecentPositive.clear();
+ TodoList.clear();
// Reuse RegBundles as our ActiveNodes vector.
ActiveNodes = &RegBundles;
ActiveNodes->clear();
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
index 03dd58d..9b9eccc 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.h
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -29,6 +29,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/BlockFrequency.h"
@@ -66,6 +67,9 @@ class SpillPlacement : public MachineFunctionPass {
/// its inputs falls in the open interval (-Threshold;Threshold).
BlockFrequency Threshold;
+ /// List of nodes that need to be updated in ::iterate.
+ SparseSet<unsigned> TodoList;
+
public:
static char ID; // Pass identification, replacement for typeid.
@@ -157,6 +161,8 @@ private:
void activate(unsigned);
void setThreshold(const BlockFrequency &Entry);
+
+ bool update(unsigned);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
index 08f99ec..61ee508 100644
--- a/contrib/llvm/lib/CodeGen/Spiller.h
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -16,6 +16,7 @@ namespace llvm {
class MachineFunction;
class MachineFunctionPass;
class VirtRegMap;
+ class LiveIntervals;
/// Spiller interface.
///
@@ -28,7 +29,7 @@ namespace llvm {
/// spill - Spill the LRE.getParent() live interval.
virtual void spill(LiveRangeEdit &LRE) = 0;
-
+ virtual void postOptimization(){};
};
/// Create and return a spiller that will insert spill code directly instead
@@ -36,7 +37,6 @@ namespace llvm {
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
-
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 51dddab..07be24b 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -37,82 +38,101 @@ STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
//===----------------------------------------------------------------------===//
-// Split Analysis
+// Last Insert Point Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
- const MachineLoopInfo &mli)
- : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
- TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
- LastSplitPoint(MF.getNumBlockIDs()) {}
+InsertPointAnalysis::InsertPointAnalysis(const LiveIntervals &lis,
+ unsigned BBNum)
+ : LIS(lis), LastInsertPoint(BBNum) {}
-void SplitAnalysis::clear() {
- UseSlots.clear();
- UseBlocks.clear();
- ThroughBlocks.clear();
- CurLI = nullptr;
- DidRepairRange = false;
-}
+SlotIndex
+InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
-SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
- const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
- // FIXME: Handle multiple EH pad successors.
- const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
- std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
- SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+ SmallVector<const MachineBasicBlock *, 1> EHPadSucessors;
+ for (const MachineBasicBlock *SMBB : MBB.successors())
+ if (SMBB->isEHPad())
+ EHPadSucessors.push_back(SMBB);
- // Compute split points on the first call. The pair is independent of the
+ // Compute insert points on the first call. The pair is independent of the
// current live interval.
- if (!LSP.first.isValid()) {
- MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
- if (FirstTerm == MBB->end())
- LSP.first = MBBEnd;
+ if (!LIP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm == MBB.end())
+ LIP.first = MBBEnd;
else
- LSP.first = LIS.getInstructionIndex(FirstTerm);
+ LIP.first = LIS.getInstructionIndex(*FirstTerm);
// If there is a landing pad successor, also find the call instruction.
- if (!LPad)
- return LSP.first;
+ if (EHPadSucessors.empty())
+ return LIP.first;
// There may not be a call instruction (?) in which case we ignore LPad.
- LSP.second = LSP.first;
- for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+ LIP.second = LIP.first;
+ for (MachineBasicBlock::const_iterator I = MBB.end(), E = MBB.begin();
I != E;) {
--I;
if (I->isCall()) {
- LSP.second = LIS.getInstructionIndex(I);
+ LIP.second = LIS.getInstructionIndex(*I);
break;
}
}
}
- // If CurLI is live into a landing pad successor, move the last split point
+ // If CurLI is live into a landing pad successor, move the last insert point
// back to the call that may throw.
- if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
- return LSP.first;
+ if (!LIP.second)
+ return LIP.first;
+
+ if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) {
+ return LIS.isLiveInToMBB(CurLI, EHPad);
+ }))
+ return LIP.first;
// Find the value leaving MBB.
- const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+ const VNInfo *VNI = CurLI.getVNInfoBefore(MBBEnd);
if (!VNI)
- return LSP.first;
+ return LIP.first;
// If the value leaving MBB was defined after the call in MBB, it can't
// really be live-in to the landing pad. This can happen if the landing pad
// has a PHI, and this register is undef on the exceptional edge.
// <rdar://problem/10664933>
- if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
- return LSP.first;
+ if (!SlotIndex::isEarlierInstr(VNI->def, LIP.second) && VNI->def < MBBEnd)
+ return LIP.first;
// Value is properly live-in to the landing pad.
- // Only allow splits before the call.
- return LSP.second;
+ // Only allow inserts before the call.
+ return LIP.second;
}
MachineBasicBlock::iterator
-SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
- SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
- if (LSP == LIS.getMBBEndIdx(MBB))
- return MBB->end();
- return LIS.getInstructionFromIndex(LSP);
+InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB) {
+ SlotIndex LIP = getLastInsertPoint(CurLI, MBB);
+ if (LIP == LIS.getMBBEndIdx(&MBB))
+ return MBB.end();
+ return LIS.getInstructionFromIndex(LIP);
+}
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
+ TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
+ IPA(lis, MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = nullptr;
+ DidRepairRange = false;
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -129,7 +149,7 @@ void SplitAnalysis::analyzeUses() {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg))
if (!MO.isUndef())
- UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot());
+ UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot());
array_pod_sort(UseSlots.begin(), UseSlots.end());
@@ -318,11 +338,13 @@ void SplitAnalysis::analyze(const LiveInterval *li) {
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
+SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa,
+ LiveIntervals &lis, VirtRegMap &vrm,
MachineDominatorTree &mdt,
MachineBlockFrequencyInfo &mbfi)
- : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()),
- MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
+ : SA(sa), AA(aa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt),
+ TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
RegAssign(Allocator) {}
@@ -347,7 +369,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void SplitEditor::dump() const {
+LLVM_DUMP_METHOD void SplitEditor::dump() const {
if (RegAssign.empty()) {
dbgs() << " empty\n";
return;
@@ -430,16 +452,22 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
bool Late = RegIdx != 0;
// Attempt cheap-as-a-copy rematerialization.
+ unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
LiveRangeEdit::Remat RM(ParentVNI);
- if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
++NumRemats;
} else {
// Can't remat, just insert a copy from parent.
CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
.addReg(Edit->getReg());
- Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
- .getRegSlot();
+ Def = LIS.getSlotIndexes()
+ ->insertMachineInstrInMaps(*CopyMI, Late)
+ .getRegSlot();
++NumCopies;
}
@@ -638,7 +666,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
LIS.removeVRegDefAt(*LI, Def);
- LIS.RemoveMachineInstrFromMaps(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
// Adjust RegAssign if a register assignment is killed at Def. We want to
@@ -654,7 +682,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
} else {
- SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
+ SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
AssignI.setStop(Kill);
}
@@ -715,7 +743,62 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
}
}
-void SplitEditor::hoistCopiesForSize() {
+void SplitEditor::computeRedundantBackCopies(
+ DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ LiveInterval *Parent = &Edit->getParent();
+ SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
+ SmallPtrSet<VNInfo *, 8> DominatedVNIs;
+
+ // Aggregate VNIs having the same value as ParentVNI.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ EqualVNs[ParentVNI->id].insert(VNI);
+ }
+
+ // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
+ // redundant VNIs to BackCopies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ if (!NotToHoistSet.count(ParentVNI->id))
+ continue;
+ SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
+ SmallPtrSetIterator<VNInfo *> It2 = It1;
+ for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
+ It2 = It1;
+ for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
+ if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
+ continue;
+
+ MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
+ MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
+ if (MBB1 == MBB2) {
+ DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
+ } else if (MDT.dominates(MBB1, MBB2)) {
+ DominatedVNIs.insert(*It2);
+ } else if (MDT.dominates(MBB2, MBB1)) {
+ DominatedVNIs.insert(*It1);
+ }
+ }
+ }
+ if (!DominatedVNIs.empty()) {
+ forceRecompute(0, ParentVNI);
+ for (auto VNI : DominatedVNIs) {
+ BackCopies.push_back(VNI);
+ }
+ DominatedVNIs.clear();
+ }
+ }
+}
+
+/// For SM_Size mode, find a common dominator for all the back-copies for
+/// the same ParentVNI and hoist the backcopies to the dominator BB.
+/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
+/// to do the hoisting, simply remove the dominated backcopies for the same
+/// ParentVNI.
+void SplitEditor::hoistCopies() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
LiveInterval *Parent = &Edit->getParent();
@@ -724,6 +807,11 @@ void SplitEditor::hoistCopiesForSize() {
// indexed by ParentVNI->id.
typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+ // The total cost of all the back-copies for each ParentVNI.
+ SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
+ // The ParentVNI->id set for which hoisting back-copies are not beneficial
+ // for Speed.
+ DenseSet<unsigned> NotToHoistSet;
// Find the nearest common dominator for parent values with multiple
// back-copies. If a single back-copy dominates, put it in DomPair.second.
@@ -739,6 +827,7 @@ void SplitEditor::hoistCopiesForSize() {
continue;
MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+
DomPair &Dom = NearestDom[ParentVNI->id];
// Keep directly defined parent values. This is either a PHI or an
@@ -773,6 +862,7 @@ void SplitEditor::hoistCopiesForSize() {
else if (Near != Dom.first)
// None dominate. Hoist to common dominator, need new def.
Dom = DomPair(Near, SlotIndex());
+ Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
}
DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
@@ -791,6 +881,11 @@ void SplitEditor::hoistCopiesForSize() {
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
+ if (SpillMode == SM_Speed &&
+ MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
Dom.second =
defFromParent(0, ParentVNI, Last, *Dom.first,
@@ -805,11 +900,18 @@ void SplitEditor::hoistCopiesForSize() {
continue;
VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
const DomPair &Dom = NearestDom[ParentVNI->id];
- if (!Dom.first || Dom.second == VNI->def)
+ if (!Dom.first || Dom.second == VNI->def ||
+ NotToHoistSet.count(ParentVNI->id))
continue;
BackCopies.push_back(VNI);
forceRecompute(0, ParentVNI);
}
+
+ // If it is not beneficial to hoist all the BackCopies, simply remove
+ // redundant BackCopies in speed mode.
+ if (SpillMode == SM_Speed && !NotToHoistSet.empty())
+ computeRedundantBackCopies(NotToHoistSet, BackCopies);
+
removeBackCopies(BackCopies);
}
@@ -924,12 +1026,22 @@ bool SplitEditor::transferValues() {
}
void SplitEditor::extendPHIKillRanges() {
- // Extend live ranges to be live-out for successor PHI values.
+ // Extend live ranges to be live-out for successor PHI values.
for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
continue;
unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+
+ // Check whether PHI is dead.
+ const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end == PHIVNI->def.getDeadSlot()) {
+ // This is a dead PHI. Remove it.
+ LR.removeSegment(*Segment, true);
+ continue;
+ }
+
LiveRangeCalc &LRC = getLRCalc(RegIdx);
MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
@@ -964,7 +1076,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// <undef> operands don't really read the register, so it doesn't matter
// which register we choose. When the use operand is tied to a def, we must
// use the same register as the def, so just do that always.
- SlotIndex Idx = LIS.getInstructionIndex(MI);
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
if (MO.isDef() || MO.isUndef())
Idx = Idx.getRegSlot(MO.isEarlyClobber());
@@ -1003,6 +1115,8 @@ void SplitEditor::deleteRematVictims() {
// Dead defs end at the dead slot.
if (S.end != S.valno->def.getDeadSlot())
continue;
+ if (S.valno->isPHIDef())
+ continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
MI->addRegisterDead(LI->reg, &TRI);
@@ -1018,7 +1132,7 @@ void SplitEditor::deleteRematVictims() {
if (Dead.empty())
return;
- Edit->eliminateDeadDefs(Dead);
+ Edit->eliminateDeadDefs(Dead, None, &AA);
}
void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
@@ -1047,22 +1161,22 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// Leave all back-copies as is.
break;
case SM_Size:
- hoistCopiesForSize();
- break;
case SM_Speed:
- llvm_unreachable("Spill mode 'speed' not implemented yet");
+ // hoistCopies will behave differently between size and speed.
+ hoistCopies();
}
// Transfer the simply mapped values, check if any are skipped.
bool Skipped = transferValues();
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
if (Skipped)
extendPHIKillRanges();
else
++NumSimple;
- // Rewrite virtual registers, possibly extending ranges.
- rewriteAssigned(Skipped);
-
// Delete defs that were rematted everywhere.
if (Skipped)
deleteRematVictims();
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index 69c65ff..a968494 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -18,6 +18,7 @@
#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -37,6 +38,40 @@ class VirtRegMap;
class VNInfo;
class raw_ostream;
+/// Determines the latest safe point in a block in which we can insert a split,
+/// spill or other instruction related with CurLI.
+class LLVM_LIBRARY_VISIBILITY InsertPointAnalysis {
+private:
+ const LiveIntervals &LIS;
+
+ /// Last legal insert point in each basic block in the current function.
+ /// The first entry is the first terminator, the second entry is the
+ /// last valid point to insert a split or spill for a variable that is
+ /// live into a landing pad successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint;
+
+ SlotIndex computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB);
+
+public:
+ InsertPointAnalysis(const LiveIntervals &lis, unsigned BBNum);
+
+ /// Return the base index of the last valid insert point for \pCurLI in \pMBB.
+ SlotIndex getLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ // Inline the common simple case.
+ if (LastInsertPoint[Num].first.isValid() &&
+ !LastInsertPoint[Num].second.isValid())
+ return LastInsertPoint[Num].first;
+ return computeLastInsertPoint(CurLI, MBB);
+ }
+
+ /// Returns the last insert point as an iterator for \pCurLI in \pMBB.
+ MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB);
+};
+
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
/// opportunities.
class LLVM_LIBRARY_VISIBILITY SplitAnalysis {
@@ -83,15 +118,12 @@ private:
// Current live interval.
const LiveInterval *CurLI;
+ /// Insert Point Analysis.
+ InsertPointAnalysis IPA;
+
// Sorted slot indexes of using instructions.
SmallVector<SlotIndex, 8> UseSlots;
- /// LastSplitPoint - Last legal split point in each basic block in the current
- /// function. The first entry is the first terminator, the second entry is the
- /// last valid split point for a variable that is live in to a landing pad
- /// successor.
- SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint;
-
/// UseBlocks - Blocks where CurLI has uses.
SmallVector<BlockInfo, 8> UseBlocks;
@@ -108,8 +140,6 @@ private:
/// DidRepairRange - analyze was forced to shrinkToUses().
bool DidRepairRange;
- SlotIndex computeLastSplitPoint(unsigned Num);
-
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
@@ -136,19 +166,6 @@ public:
/// getParent - Return the last analyzed interval.
const LiveInterval &getParent() const { return *CurLI; }
- /// getLastSplitPoint - Return the base index of the last valid split point
- /// in the basic block numbered Num.
- SlotIndex getLastSplitPoint(unsigned Num) {
- // Inline the common simple case.
- if (LastSplitPoint[Num].first.isValid() &&
- !LastSplitPoint[Num].second.isValid())
- return LastSplitPoint[Num].first;
- return computeLastSplitPoint(Num);
- }
-
- /// getLastSplitPointIter - Returns the last split point as an iterator.
- MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
-
/// isOriginalEndpoint - Return true if the original live range was killed or
/// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
/// and 'use' for an early-clobber def.
@@ -194,6 +211,14 @@ public:
/// @param BI The block to be isolated.
/// @param SingleInstrs True when single instructions should be isolated.
bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
+
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num));
+ }
+
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
+ return IPA.getLastInsertPointIter(*CurLI, *BB);
+ }
};
@@ -210,6 +235,7 @@ public:
///
class LLVM_LIBRARY_VISIBILITY SplitEditor {
SplitAnalysis &SA;
+ AliasAnalysis &AA;
LiveIntervals &LIS;
VirtRegMap &VRM;
MachineRegisterInfo &MRI;
@@ -329,9 +355,14 @@ private:
MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB);
- /// hoistCopiesForSize - Hoist back-copies to the complement interval in a
- /// way that minimizes code size. This implements the SM_Size spill mode.
- void hoistCopiesForSize();
+ /// Find out all the backCopies dominated by others.
+ void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
+ SmallVectorImpl<VNInfo *> &BackCopies);
+
+ /// Hoist back-copies to the complement interval. It tries to hoist all
+ /// the back-copies to one BB if it is beneficial, or else simply remove
+ /// redundant backcopies dominated by others.
+ void hoistCopies();
/// transferValues - Transfer values to the new ranges.
/// Return true if any ranges were skipped.
@@ -350,8 +381,9 @@ private:
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
- SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
- MachineDominatorTree&, MachineBlockFrequencyInfo &);
+ SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&,
+ VirtRegMap&, MachineDominatorTree&,
+ MachineBlockFrequencyInfo &);
/// reset - Prepare for a new split.
void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 7b52038..87cd470 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -21,33 +21,30 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -67,18 +64,180 @@ DisableColoring("no-stack-coloring",
/// The user may write code that uses allocas outside of the declared lifetime
/// zone. This can happen when the user returns a reference to a local
/// data-structure. We can detect these cases and decide not to optimize the
-/// code. If this flag is enabled, we try to save the user.
+/// code. If this flag is enabled, we try to save the user. This option
+/// is treated as overriding LifetimeStartOnFirstUse below.
static cl::opt<bool>
ProtectFromEscapedAllocas("protect-from-escaped-allocas",
cl::init(false), cl::Hidden,
cl::desc("Do not optimize lifetime zones that "
"are broken"));
+/// Enable enhanced dataflow scheme for lifetime analysis (treat first
+/// use of stack slot as start of slot lifetime, as opposed to looking
+/// for LIFETIME_START marker). See "Implementation notes" below for
+/// more info.
+static cl::opt<bool>
+LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
+ cl::init(true), cl::Hidden,
+ cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+
+
STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+//
+// Implementation Notes:
+// ---------------------
+//
+// Consider the following motivating example:
+//
+// int foo() {
+// char b1[1024], b2[1024];
+// if (...) {
+// char b3[1024];
+// <uses of b1, b3>;
+// return x;
+// } else {
+// char b4[1024], b5[1024];
+// <uses of b2, b4, b5>;
+// return y;
+// }
+// }
+//
+// In the code above, "b3" and "b4" are declared in distinct lexical
+// scopes, meaning that it is easy to prove that they can share the
+// same stack slot. Variables "b1" and "b2" are declared in the same
+// scope, meaning that from a lexical point of view, their lifetimes
+// overlap. From a control flow pointer of view, however, the two
+// variables are accessed in disjoint regions of the CFG, thus it
+// should be possible for them to share the same stack slot. An ideal
+// stack allocation for the function above would look like:
+//
+// slot 0: b1, b2
+// slot 1: b3, b4
+// slot 2: b5
+//
+// Achieving this allocation is tricky, however, due to the way
+// lifetime markers are inserted. Here is a simplified view of the
+// control flow graph for the code above:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------ block 2 -------+
+// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
+// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
+// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
+// +-----------------------+ +-----------------------+
+// \. /.
+// +------ block 3 -------+
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +-----------------------+
+//
+// If we create live intervals for the variables above strictly based
+// on the lifetime markers, we'll get the set of intervals on the
+// left. If we ignore the lifetime start markers and instead treat a
+// variable's lifetime as beginning with the first reference to the
+// var, then we get the intervals on the right.
+//
+// LIFETIME_START First Use
+// b1: [0,9] [3,4] [8,9]
+// b2: [0,9] [6,9]
+// b3: [2,4] [3,4]
+// b4: [5,7] [6,7]
+// b5: [5,7] [6,7]
+//
+// For the intervals on the left, the best we can do is overlap two
+// variables (b3 and b4, for example); this gives us a stack size of
+// 4*1024 bytes, not ideal. When treating first-use as the start of a
+// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+// byte stack (better).
+//
+// Relying entirely on first-use of stack slots is problematic,
+// however, due to the fact that optimizations can sometimes migrate
+// uses of a variable outside of its lifetime start/end region. Here
+// is an example:
+//
+// int bar() {
+// char b1[1024], b2[1024];
+// if (...) {
+// <uses of b2>
+// return y;
+// } else {
+// <uses of b1>
+// while (...) {
+// char b3[1024];
+// <uses of b3>
+// }
+// }
+// }
+//
+// Before optimization, the control flow graph for the code above
+// might look like the following:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------- block 2 -------+
+// 2| <uses of b2> | 3| <uses of b1> |
+// +-----------------------+ +-----------------------+
+// | |
+// | +------- block 3 -------+ <-\.
+// | 4| <while condition> | |
+// | +-----------------------+ |
+// | / | |
+// | / +------- block 4 -------+
+// \ / 5| LIFETIME_START b3 | |
+// \ / 6| <uses of b3> | |
+// \ / 7| LIFETIME_END b3 | |
+// \ | +------------------------+ |
+// \ | \ /
+// +------ block 5 -----+ \---------------
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +---------------------+
+//
+// During optimization, however, it can happen that an instruction
+// computing an address in "b3" (for example, a loop-invariant GEP) is
+// hoisted up out of the loop from block 4 to block 2. [Note that
+// this is not an actual load from the stack, only an instruction that
+// computes the address to be loaded]. If this happens, there is now a
+// path leading from the first use of b3 to the return instruction
+// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
+// now larger than if we were computing live intervals strictly based
+// on lifetime markers. In the example above, this lengthened lifetime
+// would mean that it would appear illegal to overlap b3 with b2.
+//
+// To deal with this such cases, the code in ::collectMarkers() below
+// tries to identify "degenerate" slots -- those slots where on a single
+// forward pass through the CFG we encounter a first reference to slot
+// K before we hit the slot K lifetime start marker. For such slots,
+// we fall back on using the lifetime start marker as the beginning of
+// the variable's lifetime. NB: with this implementation, slots can
+// appear degenerate in cases where there is unstructured control flow:
+//
+// if (q) goto mid;
+// if (x > 9) {
+// int b[100];
+// memcpy(&b[0], ...);
+// mid: b[k] = ...;
+// abc(&b);
+// }
+//
+// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
+// before visiting the memcpy block (which will contain the lifetime start
+// for "b" then it will appear that 'b' has a degenerate lifetime.
+//
+
//===----------------------------------------------------------------------===//
// StackColoring Pass
//===----------------------------------------------------------------------===//
@@ -126,6 +285,17 @@ class StackColoring : public MachineFunctionPass {
/// once the coloring is done.
SmallVector<MachineInstr*, 8> Markers;
+ /// Record the FI slots for which we have seen some sort of
+ /// lifetime marker (either start or end).
+ BitVector InterestingSlots;
+
+ /// FI slots that need to be handled conservatively (for these
+ /// slots lifetime-start-on-first-use is disabled).
+ BitVector ConservativeSlots;
+
+ /// Number of iterations taken during data flow analysis.
+ unsigned NumIterations;
+
public:
static char ID;
StackColoring() : MachineFunctionPass(ID) {
@@ -137,6 +307,9 @@ public:
private:
/// Debug.
void dump() const;
+ void dumpIntervals() const;
+ void dumpBB(MachineBasicBlock *MBB) const;
+ void dumpBV(const char *tag, const BitVector &BV) const;
/// Removes all of the lifetime marker instructions from the function.
/// \returns true if any markers were removed.
@@ -153,6 +326,25 @@ private:
/// in and out blocks.
void calculateLocalLiveness();
+ /// Returns TRUE if we're using the first-use-begins-lifetime method for
+ /// this slot (if FALSE, then the start marker is treated as start of lifetime).
+ bool applyFirstUse(int Slot) {
+ if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
+ return false;
+ if (ConservativeSlots.test(Slot))
+ return false;
+ return true;
+ }
+
+ /// Examines the specified instruction and returns TRUE if the instruction
+ /// represents the start or end of an interesting lifetime. The slot or slots
+ /// starting or ending are added to the vector "slots" and "isStart" is set
+ /// accordingly.
+ /// \returns True if inst contains a lifetime start or end
+ bool isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart);
+
/// Construct the LiveIntervals for the slots.
void calculateLiveIntervals(unsigned NumSlots);
@@ -170,7 +362,10 @@ private:
/// Map entries which point to other entries to their destination.
/// A->B->C becomes A->C.
- void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+
+ /// Used in collectMarkers
+ typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
};
} // end anonymous namespace
@@ -179,55 +374,202 @@ char &llvm::StackColoringID = StackColoring::ID;
INITIALIZE_PASS_BEGIN(StackColoring,
"stack-coloring", "Merge disjoint stack slots", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(StackColoring,
"stack-coloring", "Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
AU.addRequired<SlotIndexes>();
AU.addRequired<StackProtector>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-void StackColoring::dump() const {
- for (MachineBasicBlock *MBB : depth_first(MF)) {
- DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " ["
- << MBB->getName() << "]\n");
+#ifndef NDEBUG
- LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
- assert(BI != BlockLiveness.end() && "Block not found");
- const BlockLifetimeInfo &BlockInfo = BI->second;
+LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
+ const BitVector &BV) const {
+ DEBUG(dbgs() << tag << " : { ");
+ for (unsigned I = 0, E = BV.size(); I != E; ++I)
+ DEBUG(dbgs() << BV.test(I) << " ");
+ DEBUG(dbgs() << "}\n");
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
+ LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
- DEBUG(dbgs()<<"BEGIN : {");
- for (unsigned i=0; i < BlockInfo.Begin.size(); ++i)
- DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" ");
- DEBUG(dbgs()<<"}\n");
+ dumpBV("BEGIN", BlockInfo.Begin);
+ dumpBV("END", BlockInfo.End);
+ dumpBV("LIVE_IN", BlockInfo.LiveIn);
+ dumpBV("LIVE_OUT", BlockInfo.LiveOut);
+}
- DEBUG(dbgs()<<"END : {");
- for (unsigned i=0; i < BlockInfo.End.size(); ++i)
- DEBUG(dbgs()<<BlockInfo.End.test(i)<<" ");
+LLVM_DUMP_METHOD void StackColoring::dump() const {
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n");
+ DEBUG(dumpBB(MBB));
+ }
+}
- DEBUG(dbgs()<<"}\n");
+LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
+ for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
+ DEBUG(dbgs() << "Interval[" << I << "]:\n");
+ DEBUG(Intervals[I]->dump());
+ }
+}
- DEBUG(dbgs()<<"LIVE_IN: {");
- for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i)
- DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" ");
+#endif // not NDEBUG
+
+static inline int getStartOrEndSlot(const MachineInstr &MI)
+{
+ assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Expected LIFETIME_START or LIFETIME_END op");
+ const MachineOperand &MO = MI.getOperand(0);
+ int Slot = MO.getIndex();
+ if (Slot >= 0)
+ return Slot;
+ return -1;
+}
- DEBUG(dbgs()<<"}\n");
- DEBUG(dbgs()<<"LIVEOUT: {");
- for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i)
- DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" ");
- DEBUG(dbgs()<<"}\n");
+//
+// At the moment the only way to end a variable lifetime is with
+// a VARIABLE_LIFETIME op (which can't contain a start). If things
+// change and the IR allows for a single inst that both begins
+// and ends lifetime(s), this interface will need to be reworked.
+//
+bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart)
+{
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ return false;
+ if (!InterestingSlots.test(Slot))
+ return false;
+ slots.push_back(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ isStart = false;
+ return true;
+ }
+ if (! applyFirstUse(Slot)) {
+ isStart = true;
+ return true;
+ }
+ } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
+ if (! MI.isDebugValue()) {
+ bool found = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot<0)
+ continue;
+ if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
+ slots.push_back(Slot);
+ found = true;
+ }
+ }
+ if (found) {
+ isStart = true;
+ return true;
+ }
+ }
}
+ return false;
}
-unsigned StackColoring::collectMarkers(unsigned NumSlot) {
+unsigned StackColoring::collectMarkers(unsigned NumSlot)
+{
unsigned MarkersFound = 0;
- // Scan the function to find all lifetime markers.
+ BlockBitVecMap SeenStartMap;
+ InterestingSlots.clear();
+ InterestingSlots.resize(NumSlot);
+ ConservativeSlots.clear();
+ ConservativeSlots.resize(NumSlot);
+
+ // number of start and end lifetime ops for each slot
+ SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+
+ // Step 1: collect markers and populate the "InterestingSlots"
+ // and "ConservativeSlots" sets.
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+
+ // Compute the set of slots for which we've seen a START marker but have
+ // not yet seen an END marker at this point in the walk (e.g. on entry
+ // to this bb).
+ BitVector BetweenStartEnd;
+ BetweenStartEnd.resize(NumSlot);
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
+ if (I != SeenStartMap.end()) {
+ BetweenStartEnd |= I->second;
+ }
+ }
+
+ // Walk the instructions in the block to look for start/end ops.
+ for (MachineInstr &MI : *MBB) {
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ continue;
+ InterestingSlots.set(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
+ BetweenStartEnd.set(Slot);
+ NumStartLifetimes[Slot] += 1;
+ } else {
+ BetweenStartEnd.reset(Slot);
+ NumEndLifetimes[Slot] += 1;
+ }
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs() << "Found a lifetime ");
+ DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
+ ? "start"
+ : "end"));
+ DEBUG(dbgs() << " marker for slot #" << Slot);
+ DEBUG(dbgs() << " with allocation: " << Allocation->getName()
+ << "\n");
+ }
+ Markers.push_back(&MI);
+ MarkersFound += 1;
+ } else {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot < 0)
+ continue;
+ if (! BetweenStartEnd.test(Slot)) {
+ ConservativeSlots.set(Slot);
+ }
+ }
+ }
+ }
+ BitVector &SeenStart = SeenStartMap[MBB];
+ SeenStart |= BetweenStartEnd;
+ }
+ if (!MarkersFound) {
+ return 0;
+ }
+
+ // PR27903: slots with multiple start or end lifetime ops are not
+ // safe to enable for "lifetime-start-on-first-use".
+ for (unsigned slot = 0; slot < NumSlot; ++slot)
+ if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
+ ConservativeSlots.set(slot);
+ DEBUG(dumpBV("Conservative slots", ConservativeSlots));
+
+ // Step 2: compute begin/end sets for each block
+
// NOTE: We use a reverse-post-order iteration to ensure that we obtain a
// deterministic numbering, and because we'll need a post-order iteration
// later for solving the liveness dataflow problem.
@@ -243,35 +585,33 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
BlockInfo.Begin.resize(NumSlot);
BlockInfo.End.resize(NumSlot);
+ SmallVector<int, 4> slots;
for (MachineInstr &MI : *MBB) {
- if (MI.getOpcode() != TargetOpcode::LIFETIME_START &&
- MI.getOpcode() != TargetOpcode::LIFETIME_END)
- continue;
-
- Markers.push_back(&MI);
-
- bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START;
- const MachineOperand &MO = MI.getOperand(0);
- unsigned Slot = MO.getIndex();
-
- MarkersFound++;
-
- const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
- if (Allocation) {
- DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
- " with allocation: "<< Allocation->getName()<<"\n");
- }
-
- if (IsStart) {
- BlockInfo.Begin.set(Slot);
- } else {
- if (BlockInfo.Begin.test(Slot)) {
- // Allocas that start and end within a single block are handled
- // specially when computing the LiveIntervals to avoid pessimizing
- // the liveness propagation.
- BlockInfo.Begin.reset(Slot);
- } else {
+ bool isStart = false;
+ slots.clear();
+ if (isLifetimeStartOrEnd(MI, slots, isStart)) {
+ if (!isStart) {
+ assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
+ int Slot = slots[0];
+ if (BlockInfo.Begin.test(Slot)) {
+ BlockInfo.Begin.reset(Slot);
+ }
BlockInfo.End.set(Slot);
+ } else {
+ for (auto Slot : slots) {
+ DEBUG(dbgs() << "Found a use of slot #" << Slot);
+ DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
+ DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
+ }
+ DEBUG(dbgs() << "\n");
+ if (BlockInfo.End.test(Slot)) {
+ BlockInfo.End.reset(Slot);
+ }
+ BlockInfo.Begin.set(Slot);
+ }
}
}
}
@@ -282,90 +622,56 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
return MarkersFound;
}
-void StackColoring::calculateLocalLiveness() {
- // Perform a standard reverse dataflow computation to solve for
- // global liveness. The BEGIN set here is equivalent to KILL in the standard
- // formulation, and END is equivalent to GEN. The result of this computation
- // is a map from blocks to bitvectors where the bitvectors represent which
- // allocas are live in/out of that block.
- SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(),
- BasicBlockNumbering.end());
- unsigned NumSSMIters = 0;
+void StackColoring::calculateLocalLiveness()
+{
+ unsigned NumIters = 0;
bool changed = true;
while (changed) {
changed = false;
- ++NumSSMIters;
-
- SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
+ ++NumIters;
for (const MachineBasicBlock *BB : BasicBlockNumbering) {
- if (!BBSet.count(BB)) continue;
// Use an iterator to avoid repeated lookups.
LivenessMap::iterator BI = BlockLiveness.find(BB);
assert(BI != BlockLiveness.end() && "Block not found");
BlockLifetimeInfo &BlockInfo = BI->second;
+ // Compute LiveIn by unioning together the LiveOut sets of all preds.
BitVector LocalLiveIn;
- BitVector LocalLiveOut;
-
- // Forward propagation from begins to ends.
for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
PE = BB->pred_end(); PI != PE; ++PI) {
LivenessMap::const_iterator I = BlockLiveness.find(*PI);
assert(I != BlockLiveness.end() && "Predecessor not found");
LocalLiveIn |= I->second.LiveOut;
}
- LocalLiveIn |= BlockInfo.End;
- LocalLiveIn.reset(BlockInfo.Begin);
-
- // Reverse propagation from ends to begins.
- for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI) {
- LivenessMap::const_iterator I = BlockLiveness.find(*SI);
- assert(I != BlockLiveness.end() && "Successor not found");
- LocalLiveOut |= I->second.LiveIn;
- }
- LocalLiveOut |= BlockInfo.Begin;
- LocalLiveOut.reset(BlockInfo.End);
-
- LocalLiveIn |= LocalLiveOut;
- LocalLiveOut |= LocalLiveIn;
- // After adopting the live bits, we need to turn-off the bits which
- // are de-activated in this block.
+ // Compute LiveOut by subtracting out lifetimes that end in this
+ // block, then adding in lifetimes that begin in this block. If
+ // we have both BEGIN and END markers in the same basic block
+ // then we know that the BEGIN marker comes after the END,
+ // because we already handle the case where the BEGIN comes
+ // before the END when collecting the markers (and building the
+ // BEGIN/END vectors).
+ BitVector LocalLiveOut = LocalLiveIn;
LocalLiveOut.reset(BlockInfo.End);
- LocalLiveIn.reset(BlockInfo.Begin);
-
- // If we have both BEGIN and END markers in the same basic block then
- // we know that the BEGIN marker comes after the END, because we already
- // handle the case where the BEGIN comes before the END when collecting
- // the markers (and building the BEGIN/END vectore).
- // Want to enable the LIVE_IN and LIVE_OUT of slots that have both
- // BEGIN and END because it means that the value lives before and after
- // this basic block.
- BitVector LocalEndBegin = BlockInfo.End;
- LocalEndBegin &= BlockInfo.Begin;
- LocalLiveIn |= LocalEndBegin;
- LocalLiveOut |= LocalEndBegin;
+ LocalLiveOut |= BlockInfo.Begin;
+ // Update block LiveIn set, noting whether it has changed.
if (LocalLiveIn.test(BlockInfo.LiveIn)) {
changed = true;
BlockInfo.LiveIn |= LocalLiveIn;
-
- NextBBSet.insert(BB->pred_begin(), BB->pred_end());
}
+ // Update block LiveOut set, noting whether it has changed.
if (LocalLiveOut.test(BlockInfo.LiveOut)) {
changed = true;
BlockInfo.LiveOut |= LocalLiveOut;
-
- NextBBSet.insert(BB->succ_begin(), BB->succ_end());
}
}
-
- BBSet = std::move(NextBBSet);
}// while changed.
+
+ NumIterations = NumIters;
}
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
@@ -380,28 +686,22 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
Finishes.clear();
Finishes.resize(NumSlots);
- // Create the interval for the basic blocks with lifetime markers in them.
- for (const MachineInstr *MI : Markers) {
- if (MI->getParent() != &MBB)
- continue;
-
- assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
- MI->getOpcode() == TargetOpcode::LIFETIME_END) &&
- "Invalid Lifetime marker");
-
- bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START;
- const MachineOperand &Mo = MI->getOperand(0);
- int Slot = Mo.getIndex();
- assert(Slot >= 0 && "Invalid slot");
+ // Create the interval for the basic blocks containing lifetime begin/end.
+ for (const MachineInstr &MI : MBB) {
+ SmallVector<int, 4> slots;
+ bool IsStart = false;
+ if (!isLifetimeStartOrEnd(MI, slots, IsStart))
+ continue;
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
-
- if (IsStart) {
- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
- Starts[Slot] = ThisIndex;
- } else {
- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
- Finishes[Slot] = ThisIndex;
+ for (auto Slot : slots) {
+ if (IsStart) {
+ if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+ Finishes[Slot] = ThisIndex;
+ }
}
}
@@ -417,7 +717,29 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
}
for (unsigned i = 0; i < NumSlots; ++i) {
- assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range");
+ //
+ // When LifetimeStartOnFirstUse is turned on, data flow analysis
+ // is forward (from starts to ends), not bidirectional. A
+ // consequence of this is that we can wind up in situations
+ // where Starts[i] is invalid but Finishes[i] is valid and vice
+ // versa. Example:
+ //
+ // LIFETIME_START x
+ // if (...) {
+ // <use of x>
+ // throw ...;
+ // }
+ // LIFETIME_END x
+ // return 2;
+ //
+ //
+ // Here the slot for "x" will not be live into the block
+ // containing the "return 2" (since lifetimes start with first
+ // use, not at the dominating LIFETIME_START marker).
+ //
+ if (Starts[i].isValid() && !Finishes[i].isValid()) {
+ Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+ }
if (!Starts[i].isValid())
continue;
@@ -495,10 +817,21 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// upcoming replacement.
SP->adjustForColoring(From, To);
+ // The new alloca might not be valid in a llvm.dbg.declare for this
+ // variable, so undef out the use to make the verifier happy.
+ AllocaInst *FromAI = const_cast<AllocaInst *>(From);
+ if (FromAI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
+ for (auto &Use : FromAI->uses()) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
+ if (BCI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
+ }
+
// Note that this will not replace uses in MMOs (which we'll update below),
// or anywhere else (which is why we won't delete the original
// instruction).
- const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst);
+ FromAI->replaceAllUsesWith(Inst);
}
// Remap all instructions to the new stack slots.
@@ -557,7 +890,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// If we *don't* protect the user from escaped allocas, don't bother
// validating the instructions.
if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
- SlotIndex Index = Indexes->getInstructionIndex(&I);
+ SlotIndex Index = Indexes->getInstructionIndex(I);
const LiveInterval *Interval = &*Intervals[FromSlot];
assert(Interval->find(Index) != Interval->end() &&
"Found instruction usage outside of live range.");
@@ -616,7 +949,7 @@ void StackColoring::removeInvalidSlotRanges() {
// Check that the used slot is inside the calculated lifetime range.
// If it is not, warn about it and invalidate the range.
LiveInterval *Interval = &*Intervals[Slot];
- SlotIndex Index = Indexes->getInstructionIndex(&I);
+ SlotIndex Index = Indexes->getInstructionIndex(I);
if (Interval->find(Index) == Interval->end()) {
Interval->clear();
DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
@@ -643,9 +976,6 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
}
bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
- if (skipOptnoneFunction(*Func.getFunction()))
- return false;
-
DEBUG(dbgs() << "********** Stack Coloring **********\n"
<< "********** Function: "
<< ((const Value*)Func.getFunction())->getName() << '\n');
@@ -667,7 +997,6 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
return false;
SmallVector<int, 8> SortedSlots;
-
SortedSlots.reserve(NumSlots);
Intervals.reserve(NumSlots);
@@ -686,7 +1015,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Don't continue because there are not enough lifetime markers, or the
// stack is too small, or we are told not to optimize the slots.
- if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) {
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
+ skipFunction(*Func.getFunction())) {
DEBUG(dbgs()<<"Will not try to merge slots.\n");
return removeAllMarkers();
}
@@ -700,9 +1030,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Calculate the liveness of each block.
calculateLocalLiveness();
+ DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
+ DEBUG(dump());
// Propagate the liveness information.
calculateLiveIntervals(NumSlots);
+ DEBUG(dumpIntervals());
// Search for allocas which are used outside of the declared lifetime
// markers.
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 8550583..87e4eb6 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -62,6 +62,11 @@ public:
/// information we preserve.
void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
/// \brief Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -122,7 +127,8 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
for (auto &MBB : MF) {
DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
LiveRegs.init(TRI);
- LiveRegs.addLiveOuts(&MBB);
+ // FIXME: This should probably be addLiveOuts().
+ LiveRegs.addLiveOutsNoPristines(MBB);
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
// set to an instruction if we encounter a patchpoint instruction.
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index b3cd8b3..d91bb80 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -520,9 +520,9 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
void StackMaps::serializeToStackMapSection() {
(void)WSMP;
// Bail out if there's no stack map data.
- assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) &&
+ assert((!CSInfos.empty() || ConstPool.empty()) &&
"Expected empty constant pool too!");
- assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) &&
+ assert((!CSInfos.empty() || FnStackSize.empty()) &&
"Expected empty function record too!");
if (CSInfos.empty())
return;
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index db3fef5..89868e4 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -18,12 +18,13 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -89,15 +90,25 @@ bool StackProtector::runOnFunction(Function &Fn) {
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ HasPrologue = false;
+ HasIRCheck = false;
Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
if (Attr.isStringAttribute() &&
Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
- return false; // Invalid integer string
+ return false; // Invalid integer string
if (!RequiresStackProtector())
return false;
+ // TODO(etienneb): Functions with funclets are not correctly supported now.
+ // Do nothing if this is funclet-based personality.
+ if (Fn.hasPersonalityFn()) {
+ EHPersonality Personality = classifyEHPersonality(Fn.getPersonalityFn());
+ if (isFuncletEHPersonality(Personality))
+ return false;
+ }
+
++NumFunProtected;
return InsertStackProtectors();
}
@@ -200,11 +211,24 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
+ for (const BasicBlock &BB : *F)
+ for (const Instruction &I : BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->getCalledFunction() ==
+ Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::stackprotector))
+ HasPrologue = true;
+
+ if (F->hasFnAttribute(Attribute::SafeStack))
+ return false;
+
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
Strong = true;
+ else if (HasPrologue)
+ NeedsProtector = true;
else if (!F->hasFnAttribute(Attribute::StackProtect))
return false;
@@ -256,106 +280,51 @@ bool StackProtector::RequiresStackProtector() {
return NeedsProtector;
}
-static bool InstructionWillNotHaveChain(const Instruction *I) {
- return !I->mayHaveSideEffects() && !I->mayReadFromMemory() &&
- isSafeToSpeculativelyExecute(I);
-}
-
-/// Identify if RI has a previous instruction in the "Tail Position" and return
-/// it. Otherwise return 0.
-///
-/// This is based off of the code in llvm::isInTailCallPosition. The difference
-/// is that it inverts the first part of llvm::isInTailCallPosition since
-/// isInTailCallPosition is checking if a call is in a tail call position, and
-/// we are searching for an unknown tail call that might be in the tail call
-/// position. Once we find the call though, the code uses the same refactored
-/// code, returnTypeIsEligibleForTailCall.
-static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI,
- const TargetLoweringBase *TLI) {
- // Establish a reasonable upper bound on the maximum amount of instructions we
- // will look through to find a tail call.
- unsigned SearchCounter = 0;
- const unsigned MaxSearch = 4;
- bool NoInterposingChain = true;
-
- for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend();
- I != E && SearchCounter < MaxSearch; ++I) {
- Instruction *Inst = &*I;
-
- // Skip over debug intrinsics and do not allow them to affect our MaxSearch
- // counter.
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- // If we find a call and the following conditions are satisifed, then we
- // have found a tail call that satisfies at least the target independent
- // requirements of a tail call:
- //
- // 1. The call site has the tail marker.
- //
- // 2. The call site either will not cause the creation of a chain or if a
- // chain is necessary there are no instructions in between the callsite and
- // the call which would create an interposing chain.
- //
- // 3. The return type of the function does not impede tail call
- // optimization.
- if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
- if (CI->isTailCall() &&
- (InstructionWillNotHaveChain(CI) || NoInterposingChain) &&
- returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI))
- return CI;
- }
-
- // If we did not find a call see if we have an instruction that may create
- // an interposing chain.
- NoInterposingChain =
- NoInterposingChain && InstructionWillNotHaveChain(Inst);
-
- // Increment max search.
- SearchCounter++;
- }
-
- return nullptr;
+/// Create a stack guard loading and populate whether SelectionDAG SSP is
+/// supported.
+static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
+ IRBuilder<> &B,
+ bool *SupportsSelectionDAGSP = nullptr) {
+ if (Value *Guard = TLI->getIRStackGuard(B))
+ return B.CreateLoad(Guard, true, "StackGuard");
+
+ // Use SelectionDAG SSP handling, since there isn't an IR guard.
+ //
+ // This is more or less weird, since we optionally output whether we
+ // should perform a SelectionDAG SP here. The reason is that it's strictly
+ // defined as !TLI->getIRStackGuard(B), where getIRStackGuard is also
+ // mutating. There is no way to get this bit without mutating the IR, so
+ // getting this bit has to happen in this right time.
+ //
+ // We could have define a new function TLI::supportsSelectionDAGSP(), but that
+ // will put more burden on the backends' overriding work, especially when it
+ // actually conveys the same information getIRStackGuard() already gives.
+ if (SupportsSelectionDAGSP)
+ *SupportsSelectionDAGSP = true;
+ TLI->insertSSPDeclarations(*M);
+ return B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard));
}
-/// Insert code into the entry block that stores the __stack_chk_guard
+/// Insert code into the entry block that stores the stack guard
/// variable onto the stack:
///
/// entry:
/// StackGuardSlot = alloca i8*
-/// StackGuard = load __stack_chk_guard
-/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+/// StackGuard = <stack guard>
+/// call void @llvm.stackprotector(StackGuard, StackGuardSlot)
///
/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
/// node.
static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
- const TargetLoweringBase *TLI, const Triple &TT,
- AllocaInst *&AI, Value *&StackGuardVar) {
+ const TargetLoweringBase *TLI, AllocaInst *&AI) {
bool SupportsSelectionDAGSP = false;
- PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
- unsigned AddressSpace, Offset;
- if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
- Constant *OffsetVal =
- ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-
- StackGuardVar =
- ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy,
- AddressSpace));
- } else if (TT.isOSOpenBSD()) {
- StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy);
- cast<GlobalValue>(StackGuardVar)
- ->setVisibility(GlobalValue::HiddenVisibility);
- } else {
- SupportsSelectionDAGSP = true;
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
- }
-
IRBuilder<> B(&F->getEntryBlock().front());
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
- LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard");
- B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
- {LI, AI});
+ Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
+ B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ {GuardSlot, AI});
return SupportsSelectionDAGSP;
}
@@ -366,11 +335,9 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
/// - The epilogue checks the value stored in the prologue against the original
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
- bool HasPrologue = false;
bool SupportsSelectionDAGSP =
EnableSelectionDAGSP && !TM->Options.EnableFastISel;
AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
- Value *StackGuardVar = nullptr; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
BasicBlock *BB = &*I++;
@@ -378,30 +345,36 @@ bool StackProtector::InsertStackProtectors() {
if (!RI)
continue;
+ // Generate prologue instrumentation if not already generated.
if (!HasPrologue) {
HasPrologue = true;
- SupportsSelectionDAGSP &=
- CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar);
+ SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI);
}
- if (SupportsSelectionDAGSP) {
- // Since we have a potential tail call, insert the special stack check
- // intrinsic.
- Instruction *InsertionPt = nullptr;
- if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) {
- InsertionPt = CI;
- } else {
- InsertionPt = RI;
- // At this point we know that BB has a return statement so it *DOES*
- // have a terminator.
- assert(InsertionPt != nullptr &&
- "BB must have a terminator instruction at this point.");
- }
-
- Function *Intrinsic =
- Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck);
- CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt);
+ // SelectionDAG based code generation. Nothing else needs to be done here.
+ // The epilogue instrumentation is postponed to SelectionDAG.
+ if (SupportsSelectionDAGSP)
+ break;
+
+ // Set HasIRCheck to true, so that SelectionDAG will not generate its own
+ // version. SelectionDAG called 'shouldEmitSDCheck' to check whether
+ // instrumentation has already been generated.
+ HasIRCheck = true;
+
+ // Generate epilogue instrumentation. The epilogue intrumentation can be
+ // function-based or inlined depending on which mechanism the target is
+ // providing.
+ if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+ // Generate the function-based epilogue instrumentation.
+ // The target provides a guard check function, generate a call to it.
+ IRBuilder<> B(RI);
+ LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+ CallInst *Call = B.CreateCall(GuardCheck, {Guard});
+ llvm::Function *Function = cast<llvm::Function>(GuardCheck);
+ Call->setAttributes(Function->getAttributes());
+ Call->setCallingConv(Function->getCallingConv());
} else {
+ // Generate the epilogue with inline instrumentation.
// If we do not support SelectionDAG based tail calls, generate IR level
// tail calls.
//
@@ -415,7 +388,7 @@ bool StackProtector::InsertStackProtectors() {
//
// return:
// ...
- // %1 = load __stack_chk_guard
+ // %1 = <stack guard>
// %2 = load StackGuardSlot
// %3 = cmp i1 %1, %2
// br i1 %3, label %SP_return, label %CallStackCheckFailBlk
@@ -450,9 +423,9 @@ bool StackProtector::InsertStackProtectors() {
// Generate the stack protector instructions in the old basic block.
IRBuilder<> B(BB);
- LoadInst *LI1 = B.CreateLoad(StackGuardVar);
- LoadInst *LI2 = B.CreateLoad(AI);
- Value *Cmp = B.CreateICmpEQ(LI1, LI2);
+ Value *Guard = getStackGuard(TLI, M, B);
+ LoadInst *LI2 = B.CreateLoad(AI, true);
+ Value *Cmp = B.CreateICmpEQ(Guard, LI2);
auto SuccessProb =
BranchProbabilityInfo::getBranchProbStackProtector(true);
auto FailureProb =
@@ -475,6 +448,7 @@ BasicBlock *StackProtector::CreateFailBB() {
LLVMContext &Context = F->getContext();
BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
IRBuilder<> B(FailBB);
+ B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
if (Trip.isOSOpenBSD()) {
Constant *StackChkFail =
M->getOrInsertFunction("__stack_smash_handler",
@@ -491,3 +465,7 @@ BasicBlock *StackProtector::CreateFailBB() {
B.CreateUnreachable();
return FailBB;
}
+
+bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
+ return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 51f4d0e..d996714 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -107,7 +107,7 @@ namespace {
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
bool ColorSlots(MachineFunction &MF);
- void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping,
+ void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
MachineFunction &MF);
bool RemoveDeadStores(MachineBasicBlock* MBB);
};
@@ -145,9 +145,9 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
MachineBasicBlock *MBB = &*MBBI;
for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
MII != EE; ++MII) {
- MachineInstr *MI = &*MII;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ MachineInstr &MI = *MII;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isFI())
continue;
int FI = MO.getIndex();
@@ -156,11 +156,12 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
if (!LS->hasInterval(FI))
continue;
LiveInterval &li = LS->getInterval(FI);
- if (!MI->isDebugValue())
+ if (!MI.isDebugValue())
li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI);
}
- for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(),
- EE = MI->memoperands_end(); MMOI != EE; ++MMOI) {
+ for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
+ EE = MI.memoperands_end();
+ MMOI != EE; ++MMOI) {
MachineMemOperand *MMO = *MMOI;
if (const FixedStackPseudoSourceValue *FSV =
dyn_cast_or_null<FixedStackPseudoSourceValue>(
@@ -325,13 +326,10 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
}
// Rewrite all MO_FrameIndex operands. Look for dead stores.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = &*MBBI;
- for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
- MII != EE; ++MII)
- RewriteInstruction(MII, SlotMapping, MF);
- RemoveDeadStores(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB)
+ RewriteInstruction(MI, SlotMapping, MF);
+ RemoveDeadStores(&MBB);
}
// Delete unused stack slots.
@@ -346,12 +344,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
/// RewriteInstruction - Rewrite specified instruction by replacing references
/// to old frame index with new one.
-void StackSlotColoring::RewriteInstruction(MachineInstr *MI,
+void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
SmallVectorImpl<int> &SlotMapping,
MachineFunction &MF) {
// Update the operands.
- for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isFI())
continue;
int OldFI = MO.getIndex();
@@ -385,12 +383,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
break;
int FirstSS, SecondSS;
- if (TII->isStackSlotCopy(I, FirstSS, SecondSS) &&
- FirstSS == SecondSS &&
+ if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
FirstSS != -1) {
++NumDead;
changed = true;
- toErase.push_back(I);
+ toErase.push_back(&*I);
continue;
}
@@ -399,8 +396,10 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
unsigned LoadReg = 0;
unsigned StoreReg = 0;
- if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
- if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+ if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
+ continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
+ continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
++NumDead;
@@ -408,10 +407,10 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
++NumDead;
- toErase.push_back(I);
+ toErase.push_back(&*I);
}
- toErase.push_back(NextMI);
+ toErase.push_back(&*NextMI);
++I;
}
diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
deleted file mode 100644
index 3f60e18..0000000
--- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a GCStrategy which serves as an example for the usage
-// of a statepoint based lowering strategy. This GCStrategy is intended to
-// suitable as a default implementation usable with any collector which can
-// consume the standard stackmap format generated by statepoints, uses the
-// default addrespace to distinguish between gc managed and non-gc managed
-// pointers, and has reasonable relocation semantics.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Value.h"
-
-using namespace llvm;
-
-namespace {
-class StatepointGC : public GCStrategy {
-public:
- StatepointGC() {
- UseStatepoints = true;
- // These options are all gc.root specific, we specify them so that the
- // gc.root lowering code doesn't run.
- InitRoots = false;
- NeededSafePoints = 0;
- UsesMetadata = false;
- CustomRoots = false;
- }
- Optional<bool> isGCManagedPointer(const Type *Ty) const override {
- // Method is only valid on pointer typed values.
- const PointerType *PT = cast<PointerType>(Ty);
- // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
- // GC managed heap. We know that a pointer into this heap needs to be
- // updated and that no other pointer does. Note that addrspace(1) is used
- // only as an example, it has no special meaning, and is not reserved for
- // GC usage.
- return (1 == PT->getAddressSpace());
- }
-};
-}
-
-static GCRegistry::Add<StatepointGC> X("statepoint-example",
- "an example strategy for statepoint");
-
-namespace llvm {
-void linkStatepointExampleGC() {}
-}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index d2fbf53..2b1fb12 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -8,147 +8,52 @@
//===----------------------------------------------------------------------===//
//
// This pass duplicates basic blocks ending in unconditional branches into
-// the tails of their predecessors.
+// the tails of their predecessors, using the TailDuplicator utility class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSSAUpdater.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "tailduplication"
-STATISTIC(NumTails , "Number of tails duplicated");
-STATISTIC(NumTailDups , "Number of tail duplicated blocks");
-STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
-STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
-STATISTIC(NumAddedPHIs , "Number of phis added");
-
-// Heuristic for tail duplication.
-static cl::opt<unsigned>
-TailDuplicateSize("tail-dup-size",
- cl::desc("Maximum instructions to consider tail duplicating"),
- cl::init(2), cl::Hidden);
-
-static cl::opt<bool>
-TailDupVerify("tail-dup-verify",
- cl::desc("Verify sanity of PHI instructions during taildup"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned>
-TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
-
-typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
-
namespace {
- /// Perform tail duplication.
- class TailDuplicatePass : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineBranchProbabilityInfo *MBPI;
- MachineModuleInfo *MMI;
- MachineRegisterInfo *MRI;
- std::unique_ptr<RegScavenger> RS;
- bool PreRegAlloc;
-
- // A list of virtual registers for which to update SSA form.
- SmallVector<unsigned, 16> SSAUpdateVRs;
-
- // For each virtual register in SSAUpdateVals keep a list of source virtual
- // registers.
- DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+/// Perform tail duplication. Delegates to TailDuplicator
+class TailDuplicatePass : public MachineFunctionPass {
+ TailDuplicator Duplicator;
- public:
- static char ID;
- explicit TailDuplicatePass() :
- MachineFunctionPass(ID), PreRegAlloc(false) {}
+public:
+ static char ID;
+ explicit TailDuplicatePass() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
- private:
- void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
- MachineBasicBlock *BB);
- void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
- MachineBasicBlock *PredBB,
- DenseMap<unsigned, unsigned> &LocalVRMap,
- SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies,
- const DenseSet<unsigned> &UsedByPhi,
- bool Remove);
- void DuplicateInstruction(MachineInstr *MI,
- MachineBasicBlock *TailBB,
- MachineBasicBlock *PredBB,
- MachineFunction &MF,
- DenseMap<unsigned, unsigned> &LocalVRMap,
- const DenseSet<unsigned> &UsedByPhi);
- void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallSetVector<MachineBasicBlock*, 8> &Succs);
- bool TailDuplicateBlocks(MachineFunction &MF);
- bool shouldTailDuplicate(const MachineFunction &MF,
- bool IsSimple, MachineBasicBlock &TailBB);
- bool isSimpleBB(MachineBasicBlock *TailBB);
- bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
- bool duplicateSimpleBB(MachineBasicBlock *TailBB,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- const DenseSet<unsigned> &RegsUsedByPhi,
- SmallVectorImpl<MachineInstr *> &Copies);
- bool TailDuplicate(MachineBasicBlock *TailBB,
- bool IsSimple,
- MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallVectorImpl<MachineInstr *> &Copies);
- bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
- bool IsSimple,
- MachineFunction &MF);
-
- void RemoveDeadBlock(MachineBasicBlock *MBB);
- };
-
- char TailDuplicatePass::ID = 0;
+char TailDuplicatePass::ID = 0;
}
char &llvm::TailDuplicateID = TailDuplicatePass::ID;
-INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
- false, false)
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false,
+ false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
- if (skipOptnoneFunction(*MF.getFunction()))
+ if (skipFunction(*MF.getFunction()))
return false;
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
- MRI = &MF.getRegInfo();
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ auto MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- PreRegAlloc = MRI->isSSA();
- RS.reset();
- if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
- RS.reset(new RegScavenger());
+ Duplicator.initMF(MF, MMI, MBPI);
bool MadeChange = false;
- while (TailDuplicateBlocks(MF))
+ while (Duplicator.tailDuplicateBlocks(MF))
MadeChange = true;
return MadeChange;
@@ -158,831 +63,3 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
-static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
- MBB->pred_end());
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != MBB->end()) {
- if (!MI->isPHI())
- break;
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
- bool Found = false;
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
- MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
- if (PHIBB == PredBB) {
- Found = true;
- break;
- }
- }
- if (!Found) {
- dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
- dbgs() << " missing input from predecessor BB#"
- << PredBB->getNumber() << '\n';
- llvm_unreachable(nullptr);
- }
- }
-
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
- MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
- if (CheckExtra && !Preds.count(PHIBB)) {
- dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
- << ": " << *MI;
- dbgs() << " extra input from predecessor BB#"
- << PHIBB->getNumber() << '\n';
- llvm_unreachable(nullptr);
- }
- if (PHIBB->getNumber() < 0) {
- dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
- dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
- llvm_unreachable(nullptr);
- }
- }
- ++MI;
- }
- }
-}
-
-/// Tail duplicate the block and cleanup.
-bool
-TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
- bool IsSimple,
- MachineFunction &MF) {
- // Save the successors list.
- SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
- MBB->succ_end());
-
- SmallVector<MachineBasicBlock*, 8> TDBBs;
- SmallVector<MachineInstr*, 16> Copies;
- if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
- return false;
-
- ++NumTails;
-
- SmallVector<MachineInstr*, 8> NewPHIs;
- MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
-
- // TailBB's immediate successors are now successors of those predecessors
- // which duplicated TailBB. Add the predecessors as sources to the PHI
- // instructions.
- bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
- if (PreRegAlloc)
- UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
-
- // If it is dead, remove it.
- if (isDead) {
- NumInstrDups -= MBB->size();
- RemoveDeadBlock(MBB);
- ++NumDeadBlocks;
- }
-
- // Update SSA form.
- if (!SSAUpdateVRs.empty()) {
- for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
- unsigned VReg = SSAUpdateVRs[i];
- SSAUpdate.Initialize(VReg);
-
- // If the original definition is still around, add it as an available
- // value.
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
- MachineBasicBlock *DefBB = nullptr;
- if (DefMI) {
- DefBB = DefMI->getParent();
- SSAUpdate.AddAvailableValue(DefBB, VReg);
- }
-
- // Add the new vregs as available values.
- DenseMap<unsigned, AvailableValsTy>::iterator LI =
- SSAUpdateVals.find(VReg);
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
- unsigned SrcReg = LI->second[j].second;
- SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
- }
-
- // Rewrite uses that are outside of the original def's block.
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
- while (UI != MRI->use_end()) {
- MachineOperand &UseMO = *UI;
- MachineInstr *UseMI = UseMO.getParent();
- ++UI;
- if (UseMI->isDebugValue()) {
- // SSAUpdate can replace the use with an undef. That creates
- // a debug instruction that is a kill.
- // FIXME: Should it SSAUpdate job to delete debug instructions
- // instead of replacing the use with undef?
- UseMI->eraseFromParent();
- continue;
- }
- if (UseMI->getParent() == DefBB && !UseMI->isPHI())
- continue;
- SSAUpdate.RewriteUse(UseMO);
- }
- }
-
- SSAUpdateVRs.clear();
- SSAUpdateVals.clear();
- }
-
- // Eliminate some of the copies inserted by tail duplication to maintain
- // SSA form.
- for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
- MachineInstr *Copy = Copies[i];
- if (!Copy->isCopy())
- continue;
- unsigned Dst = Copy->getOperand(0).getReg();
- unsigned Src = Copy->getOperand(1).getReg();
- if (MRI->hasOneNonDBGUse(Src) &&
- MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
- // Copy is the only use. Do trivial copy propagation here.
- MRI->replaceRegWith(Dst, Src);
- Copy->eraseFromParent();
- }
- }
-
- if (NewPHIs.size())
- NumAddedPHIs += NewPHIs.size();
-
- return true;
-}
-
-/// Look for small blocks that are unconditionally branched to and do not fall
-/// through. Tail-duplicate their instructions into their predecessors to
-/// eliminate (dynamic) branches.
-bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
- bool MadeChange = false;
-
- if (PreRegAlloc && TailDupVerify) {
- DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
- VerifyPHIs(MF, true);
- }
-
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = &*I++;
-
- if (NumTails == TailDupLimit)
- break;
-
- bool IsSimple = isSimpleBB(MBB);
-
- if (!shouldTailDuplicate(MF, IsSimple, *MBB))
- continue;
-
- MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
- }
-
- if (PreRegAlloc && TailDupVerify)
- VerifyPHIs(MF, false);
-
- return MadeChange;
-}
-
-static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
- const MachineRegisterInfo *MRI) {
- for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
- if (UseMI.isDebugValue())
- continue;
- if (UseMI.getParent() != BB)
- return true;
- }
- return false;
-}
-
-static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
- if (MI->getOperand(i+1).getMBB() == SrcBB)
- return i;
- return 0;
-}
-
-
-// Remember which registers are used by phis in this block. This is
-// used to determine which registers are liveout while modifying the
-// block (which is why we need to copy the information).
-static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
- DenseSet<unsigned> *UsedByPhi) {
- for (const auto &MI : BB) {
- if (!MI.isPHI())
- break;
- for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
- unsigned SrcReg = MI.getOperand(i).getReg();
- UsedByPhi->insert(SrcReg);
- }
- }
-}
-
-/// Add a definition and source virtual registers pair for SSA update.
-void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
- MachineBasicBlock *BB) {
- DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
- if (LI != SSAUpdateVals.end())
- LI->second.push_back(std::make_pair(BB, NewReg));
- else {
- AvailableValsTy Vals;
- Vals.push_back(std::make_pair(BB, NewReg));
- SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
- SSAUpdateVRs.push_back(OrigReg);
- }
-}
-
-/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
-/// source register that's contributed by PredBB and update SSA update map.
-void TailDuplicatePass::ProcessPHI(
- MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
- DenseMap<unsigned, unsigned> &LocalVRMap,
- SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies,
- const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
- unsigned DefReg = MI->getOperand(0).getReg();
- unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
- assert(SrcOpIdx && "Unable to find matching PHI source?");
- unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
- const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
- LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
-
- // Insert a copy from source to the end of the block. The def register is the
- // available value liveout of the block.
- unsigned NewDef = MRI->createVirtualRegister(RC);
- Copies.push_back(std::make_pair(NewDef, SrcReg));
- if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
- AddSSAUpdateEntry(DefReg, NewDef, PredBB);
-
- if (!Remove)
- return;
-
- // Remove PredBB from the PHI node.
- MI->RemoveOperand(SrcOpIdx+1);
- MI->RemoveOperand(SrcOpIdx);
- if (MI->getNumOperands() == 1)
- MI->eraseFromParent();
-}
-
-/// Duplicate a TailBB instruction to PredBB and update
-/// the source operands due to earlier PHI translation.
-void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
- MachineBasicBlock *TailBB,
- MachineBasicBlock *PredBB,
- MachineFunction &MF,
- DenseMap<unsigned, unsigned> &LocalVRMap,
- const DenseSet<unsigned> &UsedByPhi) {
- MachineInstr *NewMI = TII->duplicate(MI, MF);
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- if (MO.isDef()) {
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- unsigned NewReg = MRI->createVirtualRegister(RC);
- MO.setReg(NewReg);
- LocalVRMap.insert(std::make_pair(Reg, NewReg));
- if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
- AddSSAUpdateEntry(Reg, NewReg, PredBB);
- } else {
- DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
- if (VI != LocalVRMap.end()) {
- MO.setReg(VI->second);
- // Clear any kill flags from this operand. The new register could have
- // uses after this one, so kills are not valid here.
- MO.setIsKill(false);
- MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
- }
- }
- }
- PredBB->insert(PredBB->instr_end(), NewMI);
-}
-
-/// After FromBB is tail duplicated into its predecessor blocks, the successors
-/// have gained new predecessors. Update the PHI instructions in them
-/// accordingly.
-void
-TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallSetVector<MachineBasicBlock*,8> &Succs) {
- for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
- SE = Succs.end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccBB = *SI;
- for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
- II != EE; ++II) {
- if (!II->isPHI())
- break;
- MachineInstrBuilder MIB(*FromBB->getParent(), II);
- unsigned Idx = 0;
- for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
- MachineOperand &MO = II->getOperand(i+1);
- if (MO.getMBB() == FromBB) {
- Idx = i;
- break;
- }
- }
-
- assert(Idx != 0);
- MachineOperand &MO0 = II->getOperand(Idx);
- unsigned Reg = MO0.getReg();
- if (isDead) {
- // Folded into the previous BB.
- // There could be duplicate phi source entries. FIXME: Should sdisel
- // or earlier pass fixed this?
- for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
- MachineOperand &MO = II->getOperand(i+1);
- if (MO.getMBB() == FromBB) {
- II->RemoveOperand(i+1);
- II->RemoveOperand(i);
- }
- }
- } else
- Idx = 0;
-
- // If Idx is set, the operands at Idx and Idx+1 must be removed.
- // We reuse the location to avoid expensive RemoveOperand calls.
-
- DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
- if (LI != SSAUpdateVals.end()) {
- // This register is defined in the tail block.
- for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = LI->second[j].first;
- // If we didn't duplicate a bb into a particular predecessor, we
- // might still have added an entry to SSAUpdateVals to correcly
- // recompute SSA. If that case, avoid adding a dummy extra argument
- // this PHI.
- if (!SrcBB->isSuccessor(SuccBB))
- continue;
-
- unsigned SrcReg = LI->second[j].second;
- if (Idx != 0) {
- II->getOperand(Idx).setReg(SrcReg);
- II->getOperand(Idx+1).setMBB(SrcBB);
- Idx = 0;
- } else {
- MIB.addReg(SrcReg).addMBB(SrcBB);
- }
- }
- } else {
- // Live in tail block, must also be live in predecessors.
- for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
- MachineBasicBlock *SrcBB = TDBBs[j];
- if (Idx != 0) {
- II->getOperand(Idx).setReg(Reg);
- II->getOperand(Idx+1).setMBB(SrcBB);
- Idx = 0;
- } else {
- MIB.addReg(Reg).addMBB(SrcBB);
- }
- }
- }
- if (Idx != 0) {
- II->RemoveOperand(Idx+1);
- II->RemoveOperand(Idx);
- }
- }
- }
-}
-
-/// Determine if it is profitable to duplicate this block.
-bool
-TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
- bool IsSimple,
- MachineBasicBlock &TailBB) {
- // Only duplicate blocks that end with unconditional branches.
- if (TailBB.canFallThrough())
- return false;
-
- // Don't try to tail-duplicate single-block loops.
- if (TailBB.isSuccessor(&TailBB))
- return false;
-
- // Set the limit on the cost to duplicate. When optimizing for size,
- // duplicate only one, because one branch instruction can be eliminated to
- // compensate for the duplication.
- unsigned MaxDuplicateCount;
- if (TailDuplicateSize.getNumOccurrences() == 0 &&
- // FIXME: Use Function::optForSize().
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
- MaxDuplicateCount = 1;
- else
- MaxDuplicateCount = TailDuplicateSize;
-
- // If the target has hardware branch prediction that can handle indirect
- // branches, duplicating them can often make them predictable when there
- // are common paths through the code. The limit needs to be high enough
- // to allow undoing the effects of tail merging and other optimizations
- // that rearrange the predecessors of the indirect branch.
-
- bool HasIndirectbr = false;
- if (!TailBB.empty())
- HasIndirectbr = TailBB.back().isIndirectBranch();
-
- if (HasIndirectbr && PreRegAlloc)
- MaxDuplicateCount = 20;
-
- // Check the instructions in the block to determine whether tail-duplication
- // is invalid or unlikely to be profitable.
- unsigned InstrCount = 0;
- for (MachineInstr &MI : TailBB) {
- // Non-duplicable things shouldn't be tail-duplicated.
- if (MI.isNotDuplicable())
- return false;
-
- // Do not duplicate 'return' instructions if this is a pre-regalloc run.
- // A return may expand into a lot more instructions (e.g. reload of callee
- // saved registers) after PEI.
- if (PreRegAlloc && MI.isReturn())
- return false;
-
- // Avoid duplicating calls before register allocation. Calls presents a
- // barrier to register allocation so duplicating them may end up increasing
- // spills.
- if (PreRegAlloc && MI.isCall())
- return false;
-
- if (!MI.isPHI() && !MI.isDebugValue())
- InstrCount += 1;
-
- if (InstrCount > MaxDuplicateCount)
- return false;
- }
-
- // Check if any of the successors of TailBB has a PHI node in which the
- // value corresponding to TailBB uses a subregister.
- // If a phi node uses a register paired with a subregister, the actual
- // "value type" of the phi may differ from the type of the register without
- // any subregisters. Due to a bug, tail duplication may add a new operand
- // without a necessary subregister, producing an invalid code. This is
- // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
- // Disable tail duplication for this case for now, until the problem is
- // fixed.
- for (auto SB : TailBB.successors()) {
- for (auto &I : *SB) {
- if (!I.isPHI())
- break;
- unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
- assert(Idx != 0);
- MachineOperand &PU = I.getOperand(Idx);
- if (PU.getSubReg() != 0)
- return false;
- }
- }
-
- if (HasIndirectbr && PreRegAlloc)
- return true;
-
- if (IsSimple)
- return true;
-
- if (!PreRegAlloc)
- return true;
-
- return canCompletelyDuplicateBB(TailBB);
-}
-
-/// True if this BB has only one unconditional jump.
-bool
-TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
- if (TailBB->succ_size() != 1)
- return false;
- if (TailBB->pred_empty())
- return false;
- MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
- if (I == TailBB->end())
- return true;
- return I->isUnconditionalBranch();
-}
-
-static bool
-bothUsedInPHI(const MachineBasicBlock &A,
- SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
- for (MachineBasicBlock *BB : A.successors())
- if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
- return true;
-
- return false;
-}
-
-bool
-TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
- for (MachineBasicBlock *PredBB : BB.predecessors()) {
- if (PredBB->succ_size() > 1)
- return false;
-
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
- return false;
-
- if (!PredCond.empty())
- return false;
- }
- return true;
-}
-
-bool
-TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- const DenseSet<unsigned> &UsedByPhi,
- SmallVectorImpl<MachineInstr *> &Copies) {
- SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
- TailBB->succ_end());
- SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
- bool Changed = false;
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
- if (PredBB->hasEHPadSuccessor())
- continue;
-
- if (bothUsedInPHI(*PredBB, Succs))
- continue;
-
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
- continue;
-
- Changed = true;
- DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
- << "From simple Succ: " << *TailBB);
-
- MachineBasicBlock *NewTarget = *TailBB->succ_begin();
- MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
-
- // Make PredFBB explicit.
- if (PredCond.empty())
- PredFBB = PredTBB;
-
- // Make fall through explicit.
- if (!PredTBB)
- PredTBB = NextBB;
- if (!PredFBB)
- PredFBB = NextBB;
-
- // Redirect
- if (PredFBB == TailBB)
- PredFBB = NewTarget;
- if (PredTBB == TailBB)
- PredTBB = NewTarget;
-
- // Make the branch unconditional if possible
- if (PredTBB == PredFBB) {
- PredCond.clear();
- PredFBB = nullptr;
- }
-
- // Avoid adding fall through branches.
- if (PredFBB == NextBB)
- PredFBB = nullptr;
- if (PredTBB == NextBB && PredFBB == nullptr)
- PredTBB = nullptr;
-
- TII->RemoveBranch(*PredBB);
-
- if (PredTBB)
- TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
-
- if (!PredBB->isSuccessor(NewTarget))
- PredBB->replaceSuccessor(TailBB, NewTarget);
- else {
- PredBB->removeSuccessor(TailBB, true);
- assert(PredBB->succ_size() <= 1);
- }
-
- TDBBs.push_back(PredBB);
- }
- return Changed;
-}
-
-/// If it is profitable, duplicate TailBB's contents in each
-/// of its predecessors.
-bool
-TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
- bool IsSimple,
- MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &TDBBs,
- SmallVectorImpl<MachineInstr *> &Copies) {
- DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
-
- DenseSet<unsigned> UsedByPhi;
- getRegsUsedByPHIs(*TailBB, &UsedByPhi);
-
- if (IsSimple)
- return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
-
- // Iterate through all the unique predecessors and tail-duplicate this
- // block into them, if possible. Copying the list ahead of time also
- // avoids trouble with the predecessor list reallocating.
- bool Changed = false;
- SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
- assert(TailBB != PredBB &&
- "Single-block loop should have been rejected earlier!");
- // EH edges are ignored by AnalyzeBranch.
- if (PredBB->succ_size() > 1)
- continue;
-
- MachineBasicBlock *PredTBB, *PredFBB;
- SmallVector<MachineOperand, 4> PredCond;
- if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
- continue;
- if (!PredCond.empty())
- continue;
- // Don't duplicate into a fall-through predecessor (at least for now).
- if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
- continue;
-
- DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
- << "From Succ: " << *TailBB);
-
- TDBBs.push_back(PredBB);
-
- // Remove PredBB's unconditional branch.
- TII->RemoveBranch(*PredBB);
-
- if (RS && !TailBB->livein_empty()) {
- // Update PredBB livein.
- RS->enterBasicBlock(PredBB);
- if (!PredBB->empty())
- RS->forward(std::prev(PredBB->end()));
- for (const auto &LI : TailBB->liveins()) {
- if (!RS->isRegUsed(LI.PhysReg, false))
- // If a register is previously livein to the tail but it's not live
- // at the end of predecessor BB, then it should be added to its
- // livein list.
- PredBB->addLiveIn(LI);
- }
- }
-
- // Clone the contents of TailBB into PredBB.
- DenseMap<unsigned, unsigned> LocalVRMap;
- SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
- // Use instr_iterator here to properly handle bundles, e.g.
- // ARM Thumb2 IT block.
- MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
- while (I != TailBB->instr_end()) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
- } else {
- // Replace def of virtual registers with new registers, and update
- // uses with PHI source register or the new registers.
- DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
- }
- }
- MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
- for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
- TII->get(TargetOpcode::COPY),
- CopyInfos[i].first).addReg(CopyInfos[i].second));
- }
-
- // Simplify
- TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
-
- NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
-
- // Update the CFG.
- PredBB->removeSuccessor(PredBB->succ_begin());
- assert(PredBB->succ_empty() &&
- "TailDuplicate called on block with multiple successors!");
- for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
- E = TailBB->succ_end(); I != E; ++I)
- PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
-
- Changed = true;
- ++NumTailDups;
- }
-
- // If TailBB was duplicated into all its predecessors except for the prior
- // block, which falls through unconditionally, move the contents of this
- // block into the prior block.
- MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
- MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
- SmallVector<MachineOperand, 4> PriorCond;
- // This has to check PrevBB->succ_size() because EH edges are ignored by
- // AnalyzeBranch.
- if (PrevBB->succ_size() == 1 &&
- !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
- PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
- !TailBB->hasAddressTaken()) {
- DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
- << "From MBB: " << *TailBB);
- if (PreRegAlloc) {
- DenseMap<unsigned, unsigned> LocalVRMap;
- SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- // Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- MachineInstr *MI = &*I++;
- ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
- if (MI->getParent())
- MI->eraseFromParent();
- }
-
- // Now copy the non-PHI instructions.
- while (I != TailBB->end()) {
- // Replace def of virtual registers with new registers, and update
- // uses with PHI source register or the new registers.
- MachineInstr *MI = &*I++;
- assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
- DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
- MI->eraseFromParent();
- }
- MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
- for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
- TII->get(TargetOpcode::COPY),
- CopyInfos[i].first)
- .addReg(CopyInfos[i].second));
- }
- } else {
- // No PHIs to worry about, just splice the instructions over.
- PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
- }
- PrevBB->removeSuccessor(PrevBB->succ_begin());
- assert(PrevBB->succ_empty());
- PrevBB->transferSuccessors(TailBB);
- TDBBs.push_back(PrevBB);
- Changed = true;
- }
-
- // If this is after register allocation, there are no phis to fix.
- if (!PreRegAlloc)
- return Changed;
-
- // If we made no changes so far, we are safe.
- if (!Changed)
- return Changed;
-
-
- // Handle the nasty case in that we duplicated a block that is part of a loop
- // into some but not all of its predecessors. For example:
- // 1 -> 2 <-> 3 |
- // \ |
- // \---> rest |
- // if we duplicate 2 into 1 but not into 3, we end up with
- // 12 -> 3 <-> 2 -> rest |
- // \ / |
- // \----->-----/ |
- // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
- // with a phi in 3 (which now dominates 2).
- // What we do here is introduce a copy in 3 of the register defined by the
- // phi, just like when we are duplicating 2 into 3, but we don't copy any
- // real instructions or remove the 3 -> 2 edge from the phi in 2.
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end(); PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
- if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
- continue;
-
- // EH edges
- if (PredBB->succ_size() != 1)
- continue;
-
- DenseMap<unsigned, unsigned> LocalVRMap;
- SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- // Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- MachineInstr *MI = &*I++;
- ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
- }
- MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
- for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
- TII->get(TargetOpcode::COPY),
- CopyInfos[i].first).addReg(CopyInfos[i].second));
- }
- }
-
- return Changed;
-}
-
-/// Remove the specified dead machine basic block from the function, updating
-/// the CFG.
-void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
- assert(MBB->pred_empty() && "MBB must be dead!");
- DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
-
- // Remove all successors.
- while (!MBB->succ_empty())
- MBB->removeSuccessor(MBB->succ_end()-1);
-
- // Remove the block.
- MBB->eraseFromParent();
-}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
new file mode 100644
index 0000000..847a093
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -0,0 +1,932 @@
+//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility class duplicates basic blocks ending in unconditional branches
+// into the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "tailduplication"
+
+STATISTIC(NumTails, "Number of tails duplicated");
+STATISTIC(NumTailDups, "Number of tail duplicated blocks");
+STATISTIC(NumTailDupAdded,
+ "Number of instructions added due to tail duplication");
+STATISTIC(NumTailDupRemoved,
+ "Number of instructions removed due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs, "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicateSize(
+ "tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
+ cl::Hidden);
+
+static cl::opt<bool>
+ TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
+ cl::Hidden);
+
+namespace llvm {
+
+void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin,
+ const MachineBranchProbabilityInfo *MBPIin) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MMI = MMIin;
+ MBPI = MBPIin;
+
+ assert(MBPI != nullptr && "Machine Branch Probability Info required");
+
+ PreRegAlloc = MRI->isSSA();
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
+ << *MI;
+ dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber()
+ << '\n';
+ llvm_unreachable(nullptr);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// Tail duplicate the block and cleanup.
+bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
+ MachineBasicBlock *MBB) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock *, 8> TDBBs;
+ SmallVector<MachineInstr *, 16> Copies;
+ if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr *, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumTailDupRemoved += MBB->size();
+ removeDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = nullptr;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
+bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) {
+ MachineBasicBlock *MBB = &*I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ continue;
+
+ MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+ if (UseMI.isDebugValue())
+ continue;
+ if (UseMI.getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i + 1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for (const auto &MI : BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// Add a definition and source virtual registers pair for SSA update.
+void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
+void TailDuplicator::processPHI(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+ SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ addSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx + 1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicator::duplicateInstruction(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
+ MachineInstr *NewMI = TII->duplicate(*MI, MF);
+ if (PreRegAlloc) {
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ addSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ auto VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end()) {
+ // Need to make sure that the register class of the mapped register
+ // will satisfy the constraints of the class of the register being
+ // replaced.
+ auto *OrigRC = MRI->getRegClass(Reg);
+ auto *MappedRC = MRI->getRegClass(VI->second.Reg);
+ const TargetRegisterClass *ConstrRC;
+ if (VI->second.SubReg != 0) {
+ ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
+ VI->second.SubReg);
+ if (ConstrRC) {
+ // The actual constraining (as in "find appropriate new class")
+ // is done by getMatchingSuperRegClass, so now we only need to
+ // change the class of the mapped register.
+ MRI->setRegClass(VI->second.Reg, ConstrRC);
+ }
+ } else {
+ // For mapped registers that do not have sub-registers, simply
+ // restrict their class to match the original one.
+ ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
+ }
+
+ if (ConstrRC) {
+ // If the class constraining succeeded, we can simply replace
+ // the old register with the mapped one.
+ MO.setReg(VI->second.Reg);
+ // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
+ // sub-register, we need to compose the sub-register indices.
+ MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
+ VI->second.SubReg));
+ } else {
+ // The direct replacement is not possible, due to failing register
+ // class constraints. An explicit COPY is necessary. Create one
+ // that can be reused
+ auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
+ if (NewRC == nullptr)
+ NewRC = OrigRC;
+ unsigned NewReg = MRI->createVirtualRegister(NewRC);
+ BuildMI(*PredBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VI->second.Reg, 0, VI->second.SubReg);
+ LocalVRMap.erase(VI);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ MO.setReg(NewReg);
+ // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
+ // is equivalent to the whole register Reg. Hence, Reg:subreg
+ // is same as NewReg:subreg, so keep the sub-register index
+ // unchanged.
+ }
+ // Clear any kill flags from this operand. The new register could
+ // have uses after this one, so kills are not valid here.
+ MO.setIsKill(false);
+ }
+ }
+ }
+ }
+ PredBB->insert(PredBB->instr_end(), NewMI);
+}
+
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
+void TailDuplicator::updateSuccessorsPHIs(
+ MachineBasicBlock *FromBB, bool isDead,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallSetVector<MachineBasicBlock *, 8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ MachineInstrBuilder MIB(*FromBB->getParent(), II);
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i + 1);
+ II->RemoveOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ unsigned SrcReg = LI->second[j].second;
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(SrcReg).addMBB(SrcBB);
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(Reg).addMBB(SrcBB);
+ }
+ }
+ }
+ if (Idx != 0) {
+ II->RemoveOperand(Idx + 1);
+ II->RemoveOperand(Idx);
+ }
+ }
+ }
+}
+
+/// Determine if it is profitable to duplicate this block.
+bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ // FIXME: Use Function::optForSize().
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineInstr &MI : TailBB) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (MI.isNotDuplicable())
+ return false;
+
+ // Convergent instructions can be duplicated only if doing so doesn't add
+ // new control dependencies, which is what we're going to do here.
+ if (MI.isConvergent())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && MI.isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && MI.isCall())
+ return false;
+
+ if (!MI.isPHI() && !MI.isDebugValue())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ // Check if any of the successors of TailBB has a PHI node in which the
+ // value corresponding to TailBB uses a subregister.
+ // If a phi node uses a register paired with a subregister, the actual
+ // "value type" of the phi may differ from the type of the register without
+ // any subregisters. Due to a bug, tail duplication may add a new operand
+ // without a necessary subregister, producing an invalid code. This is
+ // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+ // Disable tail duplication for this case for now, until the problem is
+ // fixed.
+ for (auto SB : TailBB.successors()) {
+ for (auto &I : *SB) {
+ if (!I.isPHI())
+ break;
+ unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+ assert(Idx != 0);
+ MachineOperand &PU = I.getOperand(Idx);
+ if (PU.getSubReg() != 0)
+ return false;
+ }
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// True if this BB has only one unconditional jump.
+bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
+ if (I == TailBB->end())
+ return true;
+ return I->isUnconditionalBranch();
+}
+
+static bool bothUsedInPHI(const MachineBasicBlock &A,
+ const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
+ for (MachineBasicBlock *BB : A.successors())
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+
+ return false;
+}
+
+bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ for (MachineBasicBlock *PredBB : BB.predecessors()) {
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool TailDuplicator::duplicateSimpleBB(
+ MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVectorImpl<MachineInstr *> &Copies) {
+ SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->hasEHPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = nullptr;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = nullptr;
+ if (PredTBB == NextBB && PredFBB == nullptr)
+ PredTBB = nullptr;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (!PredBB->isSuccessor(NewTarget))
+ PredBB->replaceSuccessor(TailBB, NewTarget);
+ else {
+ PredBB->removeSuccessor(TailBB, true);
+ assert(PredBB->succ_size() <= 1);
+ }
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
+ MachineBasicBlock *TailBB,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallVectorImpl<MachineInstr *> &Copies) {
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ }
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+
+ // Simplify
+ TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+ NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end();
+ I != E; ++I)
+ PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+ }
+
+ return Changed;
+}
+
+/// At the end of the block \p MBB generate COPY instructions between registers
+/// described by \p CopyInfos. Append resulting instructions to \p Copies.
+void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
+ SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<MachineInstr*> &Copies) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
+ for (auto &CI : CopyInfos) {
+ auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
+ .addReg(CI.second.Reg, 0, CI.second.SubReg);
+ Copies.push_back(C);
+ }
+}
+
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
+void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end() - 1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 679ade1..cac7e63 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -12,13 +12,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <cstdlib>
@@ -59,16 +60,25 @@ bool TargetFrameLowering::needsFrameIndexResolution(
void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
- // Get the callee saved register list...
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // Resize before the early returns. Some backends expect that
+ // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no
+ // saved registers.
+ SavedRegs.resize(TRI.getNumRegs());
+
+ // When interprocedural register allocation is enabled caller saved registers
+ // are preferred over callee saved registers.
+ if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction()))
+ return;
+
+ // Get the callee saved register list...
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
return;
- SavedRegs.resize(TRI.getNumRegs());
-
// In Naked functions we aren't going to save any registers.
if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
return;
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 6eaf991..e7330c6 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cctype>
+
using namespace llvm;
static cl::opt<bool> DisableHazardRecognizer(
@@ -76,25 +77,27 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
/// may be overloaded in the target code to do that.
unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
const MCAsmInfo &MAI) const {
-
-
// Count the number of instructions in the asm.
bool atInsnStart = true;
- unsigned Length = 0;
+ unsigned InstCount = 0;
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
- strlen(MAI.getSeparatorString())) == 0)
+ strlen(MAI.getSeparatorString())) == 0) {
atInsnStart = true;
- if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- Length += MAI.getMaxInstLength();
+ } else if (strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0) {
+ // Stop counting as an instruction after a comment until the next
+ // separator.
atInsnStart = false;
}
- if (atInsnStart && strncmp(Str, MAI.getCommentString(),
- strlen(MAI.getCommentString())) == 0)
+
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ ++InstCount;
atInsnStart = false;
+ }
}
- return Length;
+ return InstCount * MAI.getMaxInstLength();
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
@@ -108,23 +111,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_begin());
+ // Save off the debug loc before erasing the instruction.
+ DebugLoc DL = Tail->getDebugLoc();
+
// Remove all the dead instructions from the end of MBB.
MBB->erase(Tail, MBB->end());
// If MBB isn't immediately before MBB, insert a branch to it.
if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
- InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(),
- Tail->getDebugLoc());
+ InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
MBB->addSuccessor(NewDest);
}
-MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI,
- bool NewMI,
- unsigned Idx1,
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI, unsigned Idx1,
unsigned Idx2) const {
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
bool HasDef = MCID.getNumDefs();
- if (HasDef && !MI->getOperand(0).isReg())
+ if (HasDef && !MI.getOperand(0).isReg())
// No idea how to commute this instruction. Target should implement its own.
return nullptr;
@@ -133,60 +137,62 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI,
assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
"TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
- assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+ assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
- unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
- unsigned Reg1 = MI->getOperand(Idx1).getReg();
- unsigned Reg2 = MI->getOperand(Idx2).getReg();
- unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
- unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
- unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
- bool Reg1IsKill = MI->getOperand(Idx1).isKill();
- bool Reg2IsKill = MI->getOperand(Idx2).isKill();
- bool Reg1IsUndef = MI->getOperand(Idx1).isUndef();
- bool Reg2IsUndef = MI->getOperand(Idx2).isUndef();
- bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead();
- bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead();
+ unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI.getOperand(Idx1).getReg();
+ unsigned Reg2 = MI.getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
+ bool Reg1IsKill = MI.getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI.getOperand(Idx2).isKill();
+ bool Reg1IsUndef = MI.getOperand(Idx1).isUndef();
+ bool Reg2IsUndef = MI.getOperand(Idx2).isUndef();
+ bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead();
+ bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
// If destination is tied to either of the commuted source register, then
// it must be updated.
if (HasDef && Reg0 == Reg1 &&
- MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
Reg2IsKill = false;
Reg0 = Reg2;
SubReg0 = SubReg2;
} else if (HasDef && Reg0 == Reg2 &&
- MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
Reg1IsKill = false;
Reg0 = Reg1;
SubReg0 = SubReg1;
}
+ MachineInstr *CommutedMI = nullptr;
if (NewMI) {
// Create a new instruction.
- MachineFunction &MF = *MI->getParent()->getParent();
- MI = MF.CloneMachineInstr(MI);
+ MachineFunction &MF = *MI.getParent()->getParent();
+ CommutedMI = MF.CloneMachineInstr(&MI);
+ } else {
+ CommutedMI = &MI;
}
if (HasDef) {
- MI->getOperand(0).setReg(Reg0);
- MI->getOperand(0).setSubReg(SubReg0);
+ CommutedMI->getOperand(0).setReg(Reg0);
+ CommutedMI->getOperand(0).setSubReg(SubReg0);
}
- MI->getOperand(Idx2).setReg(Reg1);
- MI->getOperand(Idx1).setReg(Reg2);
- MI->getOperand(Idx2).setSubReg(SubReg1);
- MI->getOperand(Idx1).setSubReg(SubReg2);
- MI->getOperand(Idx2).setIsKill(Reg1IsKill);
- MI->getOperand(Idx1).setIsKill(Reg2IsKill);
- MI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
- MI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
- MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
- MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
- return MI;
+ CommutedMI->getOperand(Idx2).setReg(Reg1);
+ CommutedMI->getOperand(Idx1).setReg(Reg2);
+ CommutedMI->getOperand(Idx2).setSubReg(SubReg1);
+ CommutedMI->getOperand(Idx1).setSubReg(SubReg2);
+ CommutedMI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ CommutedMI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ CommutedMI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
+ CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
+ CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
+ CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
+ return CommutedMI;
}
-MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI,
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
// If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
@@ -194,7 +200,7 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
// called below.
if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
!findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
- assert(MI->isCommutable() &&
+ assert(MI.isCommutable() &&
"Precondition violation: MI must be commutable.");
return nullptr;
}
@@ -232,13 +238,13 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
return true;
}
-bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
- assert(!MI->isBundle() &&
+ assert(!MI.isBundle() &&
"TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
- const MCInstrDesc &MCID = MI->getDesc();
+ const MCInstrDesc &MCID = MI.getDesc();
if (!MCID.isCommutable())
return false;
@@ -250,39 +256,37 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
CommutableOpIdx1, CommutableOpIdx2))
return false;
- if (!MI->getOperand(SrcOpIdx1).isReg() ||
- !MI->getOperand(SrcOpIdx2).isReg())
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
// No idea.
return false;
return true;
}
-bool
-TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- if (!MI->isTerminator()) return false;
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
+ if (!MI.isTerminator()) return false;
// Conditional branch is a special case.
- if (MI->isBranch() && !MI->isBarrier())
+ if (MI.isBranch() && !MI.isBarrier())
return true;
- if (!MI->isPredicable())
+ if (!MI.isPredicable())
return true;
return !isPredicated(MI);
}
bool TargetInstrInfo::PredicateInstruction(
- MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
bool MadeChange = false;
- assert(!MI->isBundle() &&
+ assert(!MI.isBundle() &&
"TargetInstrInfo::PredicateInstruction() can't handle bundles");
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MI->isPredicable())
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MI.isPredicable())
return false;
- for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (MCID.OpInfo[i].isPredicate()) {
- MachineOperand &MO = MI->getOperand(i);
+ MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
MO.setReg(Pred[j].getReg());
MadeChange = true;
@@ -299,13 +303,12 @@ bool TargetInstrInfo::PredicateInstruction(
return MadeChange;
}
-bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
if ((*o)->isLoad()) {
if (const FixedStackPseudoSourceValue *Value =
dyn_cast_or_null<FixedStackPseudoSourceValue>(
@@ -319,13 +322,12 @@ bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
return false;
}
-bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
const MachineMemOperand *&MMO,
int &FrameIndex) const {
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end();
- o != oe;
- ++o) {
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
if ((*o)->isStore()) {
if (const FixedStackPseudoSourceValue *Value =
dyn_cast_or_null<FixedStackPseudoSourceValue>(
@@ -372,40 +374,37 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- unsigned DestReg,
- unsigned SubIdx,
- const MachineInstr *Orig,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
- MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
}
-bool
-TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1,
- const MachineRegisterInfo *MRI) const {
- return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
+ const MachineInstr &MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
-MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig,
MachineFunction &MF) const {
- assert(!Orig->isNotDuplicable() &&
- "Instruction cannot be duplicated");
- return MF.CloneMachineInstr(Orig);
+ assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(&Orig);
}
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
-static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
unsigned FoldIdx) {
- assert(MI->isCopy() && "MI must be a COPY instruction");
- if (MI->getNumOperands() != 2)
+ assert(MI.isCopy() && "MI must be a COPY instruction");
+ if (MI.getNumOperands() != 2)
return nullptr;
assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
- const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
- const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+ const MachineOperand &FoldOp = MI.getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx);
if (FoldOp.getSubReg() || LiveOp.getSubReg())
return nullptr;
@@ -416,7 +415,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
"Cannot fold physregs");
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
@@ -433,17 +432,17 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
llvm_unreachable("Not a MachO target");
}
-static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
- switch (MI->getOpcode()) {
+ switch (MI.getOpcode()) {
case TargetOpcode::STACKMAP:
StartIdx = 2; // Skip ID, nShadowBytes.
break;
case TargetOpcode::PATCHPOINT: {
// For PatchPoint, the call args are not foldable.
- PatchPointOpers opers(MI);
+ PatchPointOpers opers(&MI);
StartIdx = opers.getVarIdx();
break;
}
@@ -459,15 +458,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
}
MachineInstr *NewMI =
- MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true);
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
MachineInstrBuilder MIB(MF, NewMI);
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.addOperand(MI.getOperand(i));
- for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
unsigned SpillSize;
unsigned SpillOffset;
@@ -495,35 +494,35 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
/// operand folded, otherwise NULL is returned. The client is responsible for
/// removing the old instruction and adding the new one in the instruction
/// stream.
-MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
- ArrayRef<unsigned> Ops,
- int FI) const {
- unsigned Flags = 0;
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FI,
+ LiveIntervals *LIS) const {
+ auto Flags = MachineMemOperand::MONone;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (MI->getOperand(Ops[i]).isDef())
+ if (MI.getOperand(Ops[i]).isDef())
Flags |= MachineMemOperand::MOStore;
else
Flags |= MachineMemOperand::MOLoad;
- MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock *MBB = MI.getParent();
assert(MBB && "foldMemoryOperand needs an inserted instruction");
MachineFunction &MF = *MBB->getParent();
MachineInstr *NewMI = nullptr;
- if (MI->getOpcode() == TargetOpcode::STACKMAP ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
if (NewMI)
MBB->insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
}
if (NewMI) {
- NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
NewMI->mayStore()) &&
@@ -542,14 +541,14 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
}
// Straight COPY may fold as load/store.
- if (!MI->isCopy() || Ops.size() != 1)
+ if (!MI.isCopy() || Ops.size() != 1)
return nullptr;
const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
if (!RC)
return nullptr;
- const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
MachineBasicBlock::iterator Pos = MI;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -557,7 +556,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
else
loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
- return --Pos;
+ return &*--Pos;
}
bool TargetInstrInfo::hasReassociableOperands(
@@ -637,7 +636,6 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
bool TargetInstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
-
bool Commute;
if (isReassociationCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
@@ -656,7 +654,11 @@ bool TargetInstrInfo::getMachineCombinerPatterns(
return false;
}
-
+/// Return true when a code sequence can improve loop throughput.
+bool
+TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+ return false;
+}
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
@@ -768,75 +770,73 @@ void TargetInstrInfo::genAlternativeCodeSequence(
assert(Prev && "Unknown pattern for machine combiner");
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
- return;
}
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
-MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops,
- MachineInstr *LoadMI) const {
- assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
+ MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+ assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!");
#endif
- MachineBasicBlock &MBB = *MI->getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
// Ask the target to do the actual folding.
MachineInstr *NewMI = nullptr;
int FrameIndex = 0;
- if ((MI->getOpcode() == TargetOpcode::STACKMAP ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT) &&
+ if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
isLoadFromStackSlot(LoadMI, FrameIndex)) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
if (NewMI)
- NewMI = MBB.insert(MI, NewMI);
+ NewMI = &*MBB.insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
}
if (!NewMI) return nullptr;
// Copy the memoperands from the load to the folded instruction.
- if (MI->memoperands_empty()) {
- NewMI->setMemRefs(LoadMI->memoperands_begin(),
- LoadMI->memoperands_end());
+ if (MI.memoperands_empty()) {
+ NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
}
else {
// Handle the rare case of folding multiple loads.
- NewMI->setMemRefs(MI->memoperands_begin(),
- MI->memoperands_end());
- for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(),
- E = LoadMI->memoperands_end(); I != E; ++I) {
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
+ E = LoadMI.memoperands_end();
+ I != E; ++I) {
NewMI->addMemOperand(MF, *I);
}
}
return NewMI;
}
-bool TargetInstrInfo::
-isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- const MachineFunction &MF = *MI->getParent()->getParent();
+bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
+ const MachineInstr &MI, AliasAnalysis *AA) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
// Remat clients assume operand 0 is the defined register.
- if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
+ if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
return false;
- unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned DefReg = MI.getOperand(0).getReg();
// A sub-register definition can only be rematerialized if the instruction
// doesn't read the other parts of the register. Otherwise it is really a
// read-modify-write operation on the full virtual register which cannot be
// moved safely.
if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
- MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg))
+ MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg))
return false;
// A load from a fixed stack slot can be rematerialized. This may be
@@ -848,23 +848,22 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return true;
// Avoid instructions obviously unsafe for remat.
- if (MI->isNotDuplicable() || MI->mayStore() ||
- MI->hasUnmodeledSideEffects())
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
return false;
// Don't remat inline asm. We have no idea how expensive it is
// even if it's side effect free.
- if (MI->isInlineAsm())
+ if (MI.isInlineAsm())
return false;
// Avoid instructions which load from potentially varying memory.
- if (MI->mayLoad() && !MI->isInvariantLoad(AA))
+ if (MI.mayLoad() && !MI.isInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
// the instruction is not rematerializable.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0)
@@ -901,8 +900,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return true;
}
-int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
- const MachineFunction *MF = MI->getParent()->getParent();
+int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
+ const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
@@ -910,15 +909,15 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
- if (MI->getOpcode() != FrameSetupOpcode &&
- MI->getOpcode() != FrameDestroyOpcode)
+ if (MI.getOpcode() != FrameSetupOpcode &&
+ MI.getOpcode() != FrameDestroyOpcode)
return 0;
-
- int SPAdj = MI->getOperand(0).getImm();
+
+ int SPAdj = MI.getOperand(0).getImm();
SPAdj = TFI->alignSPAdjust(SPAdj);
- if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) ||
- (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode))
+ if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
SPAdj = -SPAdj;
return SPAdj;
@@ -927,11 +926,11 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
/// isSchedulingBoundary - Test if the given instruction should be
/// considered a scheduling boundary. This primarily includes labels
/// and terminators.
-bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
// Terminators and labels can't be scheduled around.
- if (MI->isTerminator() || MI->isPosition())
+ if (MI.isTerminator() || MI.isPosition())
return true;
// Don't attempt to schedule around any instruction that defines
@@ -941,7 +940,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
// modification.
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
+ return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
}
// Provide a global flag for disabling the PreRA hazard recognizer that targets
@@ -1010,13 +1009,12 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// MachineInstr latency interface.
//===----------------------------------------------------------------------===//
-unsigned
-TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
- const MachineInstr *MI) const {
+unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr &MI) const {
if (!ItinData || ItinData->isEmpty())
return 1;
- unsigned Class = MI->getDesc().getSchedClass();
+ unsigned Class = MI.getDesc().getSchedClass();
int UOps = ItinData->Itineraries[Class].NumMicroOps;
if (UOps >= 0)
return UOps;
@@ -1028,60 +1026,59 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
/// Return the default expected latency for a def based on it's opcode.
unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel,
- const MachineInstr *DefMI) const {
- if (DefMI->isTransient())
+ const MachineInstr &DefMI) const {
+ if (DefMI.isTransient())
return 0;
- if (DefMI->mayLoad())
+ if (DefMI.mayLoad())
return SchedModel.LoadLatency;
- if (isHighLatencyDef(DefMI->getOpcode()))
+ if (isHighLatencyDef(DefMI.getOpcode()))
return SchedModel.HighLatency;
return 1;
}
-unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const {
+unsigned TargetInstrInfo::getPredicationCost(const MachineInstr &) const {
return 0;
}
-unsigned TargetInstrInfo::
-getInstrLatency(const InstrItineraryData *ItinData,
- const MachineInstr *MI,
- unsigned *PredCost) const {
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost) const {
// Default to one cycle for no itinerary. However, an "empty" itinerary may
// still have a MinLatency property, which getStageLatency checks.
if (!ItinData)
- return MI->mayLoad() ? 2 : 1;
+ return MI.mayLoad() ? 2 : 1;
- return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+ return ItinData->getStageLatency(MI.getDesc().getSchedClass());
}
bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
- const MachineInstr *DefMI,
+ const MachineInstr &DefMI,
unsigned DefIdx) const {
const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
if (!ItinData || ItinData->isEmpty())
return false;
- unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
return (DefCycle != -1 && DefCycle <= 1);
}
/// Both DefMI and UseMI must be valid. By default, call directly to the
/// itinerary. This may be overriden by the target.
-int TargetInstrInfo::
-getOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- unsigned DefClass = DefMI->getDesc().getSchedClass();
- unsigned UseClass = UseMI->getDesc().getSchedClass();
+int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const {
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ unsigned UseClass = UseMI.getDesc().getSchedClass();
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
/// If we can determine the operand latency from the def only, without itinerary
/// lookup, do so. Otherwise return -1.
int TargetInstrInfo::computeDefOperandLatency(
- const InstrItineraryData *ItinData,
- const MachineInstr *DefMI) const {
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI) const {
// Let the target hook getInstrLatency handle missing itineraries.
if (!ItinData)
@@ -1094,21 +1091,9 @@ int TargetInstrInfo::computeDefOperandLatency(
return -1;
}
-/// computeOperandLatency - Compute and return the latency of the given data
-/// dependent def and use when the operand indices are already known. UseMI may
-/// be NULL for an unknown use.
-///
-/// FindMin may be set to get the minimum vs. expected latency. Minimum
-/// latency is used for scheduling groups, while expected latency is for
-/// instruction cost and critical path.
-///
-/// Depending on the subtarget's itinerary properties, this may or may not need
-/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or
-/// UseIdx to compute min latency.
-unsigned TargetInstrInfo::
-computeOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
+unsigned TargetInstrInfo::computeOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const {
int DefLatency = computeDefOperandLatency(ItinData, DefMI);
if (DefLatency >= 0)
@@ -1118,9 +1103,9 @@ computeOperandLatency(const InstrItineraryData *ItinData,
int OperLatency = 0;
if (UseMI)
- OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx);
else {
- unsigned DefClass = DefMI->getDesc().getSchedClass();
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
}
if (OperLatency >= 0)
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 36a31c9..6d3fe8c 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -28,6 +28,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -43,6 +44,17 @@ static cl::opt<bool> JumpIsExpensiveOverride(
cl::desc("Do not create extra branches to split comparison logic."),
cl::Hidden);
+// Although this default value is arbitrary, it is not random. It is assumed
+// that a condition that evaluates the same way by a higher percentage than this
+// is best represented as control flow. Therefore, the default value N should be
+// set such that the win from N% correct executions is greater than the loss
+// from (100 - N)% mispredicted executions for the majority of intended targets.
+static cl::opt<int> MinPercentageForPredictableBranch(
+ "min-predictable-branch", cl::init(99),
+ cl::desc("Minimum percentage (0-100) that a condition must be either true "
+ "or false to assume that the condition is predictable"),
+ cl::Hidden);
+
/// InitLibcallNames - Set default libcall names.
///
static void InitLibcallNames(const char **Names, const Triple &TT) {
@@ -87,18 +99,6 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::UREM_I64] = "__umoddi3";
Names[RTLIB::UREM_I128] = "__umodti3";
- // These are generally not available.
- Names[RTLIB::SDIVREM_I8] = nullptr;
- Names[RTLIB::SDIVREM_I16] = nullptr;
- Names[RTLIB::SDIVREM_I32] = nullptr;
- Names[RTLIB::SDIVREM_I64] = nullptr;
- Names[RTLIB::SDIVREM_I128] = nullptr;
- Names[RTLIB::UDIVREM_I8] = nullptr;
- Names[RTLIB::UDIVREM_I16] = nullptr;
- Names[RTLIB::UDIVREM_I32] = nullptr;
- Names[RTLIB::UDIVREM_I64] = nullptr;
- Names[RTLIB::UDIVREM_I128] = nullptr;
-
Names[RTLIB::NEG_I32] = "__negsi2";
Names[RTLIB::NEG_I64] = "__negdi2";
Names[RTLIB::ADD_F32] = "__addsf3";
@@ -231,11 +231,21 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::COPYSIGN_F80] = "copysignl";
Names[RTLIB::COPYSIGN_F128] = "copysignl";
Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq";
+ Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq";
Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
- Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
- Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ if (TT.isOSDarwin()) {
+ // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
+ // of the gnueabi-style __gnu_*_ieee.
+ // FIXME: What about other targets?
+ Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
+ Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
+ } else {
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ }
Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
@@ -243,10 +253,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
- Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos";
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
- Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod";
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
@@ -259,7 +269,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
- Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou";
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
@@ -281,7 +291,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
- Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq";
Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
@@ -296,7 +306,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
- Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq";
Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
@@ -310,27 +320,35 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::OEQ_F32] = "__eqsf2";
Names[RTLIB::OEQ_F64] = "__eqdf2";
Names[RTLIB::OEQ_F128] = "__eqtf2";
+ Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq";
Names[RTLIB::UNE_F32] = "__nesf2";
Names[RTLIB::UNE_F64] = "__nedf2";
Names[RTLIB::UNE_F128] = "__netf2";
+ Names[RTLIB::UNE_PPCF128] = "__gcc_qne";
Names[RTLIB::OGE_F32] = "__gesf2";
Names[RTLIB::OGE_F64] = "__gedf2";
Names[RTLIB::OGE_F128] = "__getf2";
+ Names[RTLIB::OGE_PPCF128] = "__gcc_qge";
Names[RTLIB::OLT_F32] = "__ltsf2";
Names[RTLIB::OLT_F64] = "__ltdf2";
Names[RTLIB::OLT_F128] = "__lttf2";
+ Names[RTLIB::OLT_PPCF128] = "__gcc_qlt";
Names[RTLIB::OLE_F32] = "__lesf2";
Names[RTLIB::OLE_F64] = "__ledf2";
Names[RTLIB::OLE_F128] = "__letf2";
+ Names[RTLIB::OLE_PPCF128] = "__gcc_qle";
Names[RTLIB::OGT_F32] = "__gtsf2";
Names[RTLIB::OGT_F64] = "__gtdf2";
Names[RTLIB::OGT_F128] = "__gttf2";
+ Names[RTLIB::OGT_PPCF128] = "__gcc_qgt";
Names[RTLIB::UO_F32] = "__unordsf2";
Names[RTLIB::UO_F64] = "__unorddf2";
Names[RTLIB::UO_F128] = "__unordtf2";
+ Names[RTLIB::UO_PPCF128] = "__gcc_qunord";
Names[RTLIB::O_F32] = "__unordsf2";
Names[RTLIB::O_F64] = "__unorddf2";
Names[RTLIB::O_F128] = "__unordtf2";
+ Names[RTLIB::O_PPCF128] = "__gcc_qunord";
Names[RTLIB::MEMCPY] = "memcpy";
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
@@ -395,36 +413,79 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
-
- if (TT.getEnvironment() == Triple::GNU) {
+
+ Names[RTLIB::ATOMIC_LOAD] = "__atomic_load";
+ Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1";
+ Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2";
+ Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4";
+ Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8";
+ Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16";
+
+ Names[RTLIB::ATOMIC_STORE] = "__atomic_store";
+ Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1";
+ Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2";
+ Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4";
+ Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8";
+ Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16";
+
+ Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange";
+ Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1";
+ Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2";
+ Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4";
+ Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8";
+ Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16";
+
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16";
+
+ Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1";
+ Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2";
+ Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4";
+ Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8";
+ Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16";
+ Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1";
+ Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2";
+ Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4";
+ Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8";
+ Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16";
+ Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1";
+ Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2";
+ Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4";
+ Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8";
+ Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16";
+ Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1";
+ Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2";
+ Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4";
+ Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8";
+ Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16";
+ Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1";
+ Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2";
+ Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4";
+ Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8";
+ Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16";
+ Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1";
+ Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2";
+ Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4";
+ Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8";
+ Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16";
+
+ if (TT.isGNUEnvironment()) {
Names[RTLIB::SINCOS_F32] = "sincosf";
Names[RTLIB::SINCOS_F64] = "sincos";
Names[RTLIB::SINCOS_F80] = "sincosl";
Names[RTLIB::SINCOS_F128] = "sincosl";
Names[RTLIB::SINCOS_PPCF128] = "sincosl";
- } else {
- // These are generally not available.
- Names[RTLIB::SINCOS_F32] = nullptr;
- Names[RTLIB::SINCOS_F64] = nullptr;
- Names[RTLIB::SINCOS_F80] = nullptr;
- Names[RTLIB::SINCOS_F128] = nullptr;
- Names[RTLIB::SINCOS_PPCF128] = nullptr;
}
if (!TT.isOSOpenBSD()) {
Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
- } else {
- // These are generally not available.
- Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
}
- // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
- // of the gnueabi-style __gnu_*_ieee.
- // FIXME: What about other targets?
- if (TT.isOSDarwin()) {
- Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
- Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
- }
+ Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
}
/// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -446,9 +507,13 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
return FPEXT_F32_F128;
+ if (RetVT == MVT::ppcf128)
+ return FPEXT_F32_PPCF128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::f128)
return FPEXT_F64_F128;
+ else if (RetVT == MVT::ppcf128)
+ return FPEXT_F64_PPCF128;
}
return UNKNOWN_LIBCALL;
@@ -653,7 +718,7 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
-RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) {
+RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
#define OP_TO_LIBCALL(Name, Enum) \
case Name: \
switch (VT.SimpleTy) { \
@@ -698,27 +763,35 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
CCs[RTLIB::UNE_F32] = ISD::SETNE;
CCs[RTLIB::UNE_F64] = ISD::SETNE;
CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
CCs[RTLIB::OGE_F32] = ISD::SETGE;
CCs[RTLIB::OGE_F64] = ISD::SETGE;
CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
CCs[RTLIB::OLT_F32] = ISD::SETLT;
CCs[RTLIB::OLT_F64] = ISD::SETLT;
CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
CCs[RTLIB::OLE_F32] = ISD::SETLE;
CCs[RTLIB::OLE_F64] = ISD::SETLE;
CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
CCs[RTLIB::OGT_F32] = ISD::SETGT;
CCs[RTLIB::OGT_F64] = ISD::SETGT;
CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
CCs[RTLIB::UO_F32] = ISD::SETNE;
CCs[RTLIB::UO_F64] = ISD::SETNE;
CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
CCs[RTLIB::O_F32] = ISD::SETEQ;
CCs[RTLIB::O_F64] = ISD::SETEQ;
CCs[RTLIB::O_F128] = ISD::SETEQ;
+ CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
}
/// NOTE: The TargetMachine owns TLOF.
@@ -752,8 +825,14 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
PrefLoopAlignment = 0;
GatherAllAliasesMaxDepth = 6;
MinStackArgumentAlignment = 1;
- InsertFencesForAtomic = false;
MinimumJumpTableEntries = 4;
+ // TODO: the default will be switched to 0 in the next commit, along
+ // with the Target-specific changes necessary.
+ MaxAtomicSizeInBitsSupported = 1024;
+
+ MinCmpXchgSizeInBits = 0;
+
+ std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -767,8 +846,9 @@ void TargetLoweringBase::initActions() {
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
- memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
- memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+ std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
+ std::fill(std::begin(TargetDAGCombineArray),
+ std::end(TargetDAGCombineArray), 0);
// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {
@@ -803,6 +883,10 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
+ // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+
setOperationAction(ISD::BITREVERSE, VT, Expand);
// These library functions default to expand.
@@ -816,7 +900,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
- // For most targets @llvm.get.dynamic.area.offest just returns 0.
+ // For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
}
@@ -843,8 +927,6 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FEXP , VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
- setOperationAction(ISD::FMINNUM, VT, Expand);
- setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
@@ -1090,9 +1172,10 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
/// Replace/modify any TargetFrameIndex operands with a targte-dependent
/// sequence of memory operands that is recognized by PrologEpilogInserter.
-MachineBasicBlock*
-TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
+MachineBasicBlock *
+TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MachineBasicBlock *MBB) const {
+ MachineInstr *MI = &InitialMI;
MachineFunction &MF = *MI->getParent()->getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -1151,7 +1234,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
// Add a new memory operand for this FI.
assert(MFI.getObjectOffset(FI) != -1);
- unsigned Flags = MachineMemOperand::MOLoad;
+ auto Flags = MachineMemOperand::MOLoad;
if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
Flags |= MachineMemOperand::MOStore;
Flags |= MachineMemOperand::MOVolatile;
@@ -1250,10 +1333,17 @@ void TargetLoweringBase::computeRegisterProperties(
// ppcf128 type is really two f64's.
if (!isTypeLegal(MVT::ppcf128)) {
- NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
- RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
- TransformToType[MVT::ppcf128] = MVT::f64;
- ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ } else {
+ NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::ppcf128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
+ }
}
// Decide how to handle f128. If the target does not have native f128 support,
@@ -1308,13 +1398,12 @@ void TargetLoweringBase::computeRegisterProperties(
case TypePromoteInteger: {
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
- for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
- if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
- && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
- && SVT.getScalarType().isInteger()) {
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() &&
+ SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1553,6 +1642,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
}
+BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
+ return BranchProbability(MinPercentageForPredictableBranch, 100);
+}
//===----------------------------------------------------------------------===//
// TargetTransformInfo Helpers
@@ -1715,3 +1807,36 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
return true;
}
+
+//===----------------------------------------------------------------------===//
+// Stack Protector
+//===----------------------------------------------------------------------===//
+
+// For OpenBSD return its special guard variable. Otherwise return nullptr,
+// so that SelectionDAG handle SSP.
+Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
+ if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
+ Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
+ PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
+ auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy));
+ Guard->setVisibility(GlobalValue::HiddenVisibility);
+ return Guard;
+ }
+ return nullptr;
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
+ M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext()));
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
+ return M.getGlobalVariable("__stack_chk_guard", true);
+}
+
+Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 58ae9cc..5f814c9 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -33,6 +33,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
@@ -68,11 +69,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
- StringRef Prefix = ".data.";
- NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
- MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
- Flags, 0, Label->getName());
+ MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
+ ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
@@ -119,6 +118,10 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// section(".eh_frame") gcc will produce:
//
// .section .eh_frame,"a",@progbits
+
+ if (Name == getInstrProfCoverageSectionName(false))
+ return SectionKind::getMetadata();
+
if (Name.empty() || Name[0] != '.') return K;
// Some lame default implementation based on some magic section names.
@@ -259,9 +262,11 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
EntrySize = 4;
} else if (Kind.isMergeableConst8()) {
EntrySize = 8;
- } else {
- assert(Kind.isMergeableConst16() && "unknown data width");
+ } else if (Kind.isMergeableConst16()) {
EntrySize = 16;
+ } else {
+ assert(Kind.isMergeableConst32() && "unknown data width");
+ EntrySize = 32;
}
}
@@ -288,12 +293,14 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
} else {
Name = getSectionPrefixForGlobal(Kind);
}
+ // FIXME: Extend the section prefix to include hotness catagories such as .hot
+ // or .unlikely for functions.
if (EmitUniqueSection && UniqueSectionNames) {
Name.push_back('.');
TM.getNameWithPrefix(Name, GV, Mang, true);
}
- unsigned UniqueID = ~0;
+ unsigned UniqueID = MCContext::GenericSectionID;
if (EmitUniqueSection && !UniqueSectionNames) {
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
@@ -346,13 +353,16 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
/// Given a mergeable constant with the specified size and relocation
/// information, return a section that it should be placed in.
MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
- const DataLayout &DL, SectionKind Kind, const Constant *C) const {
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
if (Kind.isMergeableConst4() && MergeableConst4Section)
return MergeableConst4Section;
if (Kind.isMergeableConst8() && MergeableConst8Section)
return MergeableConst8Section;
if (Kind.isMergeableConst16() && MergeableConst16Section)
return MergeableConst16Section;
+ if (Kind.isMergeableConst32() && MergeableConst32Section)
+ return MergeableConst32Section;
if (Kind.isReadOnly())
return ReadOnlySection;
@@ -412,6 +422,27 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
KeySym);
}
+const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic sanity checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()),
+ getContext());
+}
+
void
TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
UseInitArray = UseInitArray_;
@@ -443,10 +474,7 @@ emitModuleFlags(MCStreamer &Streamer,
MDNode *LinkerOptions = nullptr;
StringRef SectionVal;
- for (ArrayRef<Module::ModuleFlagEntry>::iterator
- i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
- const Module::ModuleFlagEntry &MFE = *i;
-
+ for (const auto &MFE : ModuleFlags) {
// Ignore flags with 'Require' behavior.
if (MFE.Behavior == Module::Require)
continue;
@@ -459,6 +487,7 @@ emitModuleFlags(MCStreamer &Streamer,
} else if (Key == "Objective-C Garbage Collection" ||
Key == "Objective-C GC Only" ||
Key == "Objective-C Is Simulated" ||
+ Key == "Objective-C Class Properties" ||
Key == "Objective-C Image Swift Version") {
ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue();
} else if (Key == "Objective-C Image Info Section") {
@@ -470,16 +499,10 @@ emitModuleFlags(MCStreamer &Streamer,
// Emit the linker options if present.
if (LinkerOptions) {
- for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
- MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ for (const auto &Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
-
- // Convert to strings.
- for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
- MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
- StrOptions.push_back(MDOption->getString());
- }
-
+ for (const auto &Piece : cast<MDNode>(Option)->operands())
+ StrOptions.push_back(cast<MDString>(Piece)->getString());
Streamer.EmitLinkerOptions(StrOptions);
}
}
@@ -630,7 +653,8 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
}
MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
- const DataLayout &DL, SectionKind Kind, const Constant *C) const {
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
// If this constant requires a relocation, we have to put it in the data
// segment, not in the text segment.
if (Kind.isData() || Kind.isReadOnlyWithRel())
@@ -660,9 +684,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
- MachOMMI.getGVStubEntry(SSym);
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (!StubSym.getPointer()) {
MCSymbol *Sym = TM.getSymbol(GV, Mang);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
@@ -785,8 +807,9 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix(
//===----------------------------------------------------------------------===//
static unsigned
-getCOFFSectionFlags(SectionKind K) {
+getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) {
unsigned Flags = 0;
+ bool isThumb = TM.getTargetTriple().getArch() == Triple::thumb;
if (K.isMetadata())
Flags |=
@@ -795,7 +818,8 @@ getCOFFSectionFlags(SectionKind K) {
Flags |=
COFF::IMAGE_SCN_MEM_EXECUTE |
COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_CNT_CODE;
+ COFF::IMAGE_SCN_CNT_CODE |
+ (isThumb ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0);
else if (K.isBSS())
Flags |=
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
@@ -865,7 +889,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
int Selection = 0;
- unsigned Characteristics = getCOFFSectionFlags(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
StringRef Name = GV->getSection();
StringRef COMDATSymName = "";
if (GV->hasComdat()) {
@@ -884,10 +908,8 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
Selection = 0;
}
}
- return getContext().getCOFFSection(Name,
- Characteristics,
- Kind,
- COMDATSymName,
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
Selection);
}
@@ -916,7 +938,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
- unsigned Characteristics = getCOFFSectionFlags(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
int Selection = getSelectionForCOFF(GV);
@@ -928,16 +950,20 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
else
ComdatGV = GV;
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniquedSection)
+ UniqueID = NextUniqueID++;
+
if (!ComdatGV->hasPrivateLinkage()) {
MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
StringRef COMDATSymName = Sym->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
- COMDATSymName, Selection);
+ COMDATSymName, Selection, UniqueID);
} else {
SmallString<256> TmpData;
Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
- Selection);
+ Selection, UniqueID);
}
}
@@ -989,11 +1015,12 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
SectionKind Kind = SectionKind::getReadOnly();
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
- unsigned Characteristics = getCOFFSectionFlags(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ unsigned UniqueID = NextUniqueID++;
return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
void TargetLoweringObjectFileCOFF::
@@ -1002,32 +1029,25 @@ emitModuleFlags(MCStreamer &Streamer,
Mangler &Mang, const TargetMachine &TM) const {
MDNode *LinkerOptions = nullptr;
- // Look for the "Linker Options" flag, since it's the only one we support.
- for (ArrayRef<Module::ModuleFlagEntry>::iterator
- i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
- const Module::ModuleFlagEntry &MFE = *i;
+ for (const auto &MFE : ModuleFlags) {
StringRef Key = MFE.Key->getString();
- Metadata *Val = MFE.Val;
- if (Key == "Linker Options") {
- LinkerOptions = cast<MDNode>(Val);
- break;
- }
+ if (Key == "Linker Options")
+ LinkerOptions = cast<MDNode>(MFE.Val);
}
- if (!LinkerOptions)
- return;
- // Emit the linker options to the linker .drectve section. According to the
- // spec, this section is a space-separated string containing flags for linker.
- MCSection *Sec = getDrectveSection();
- Streamer.SwitchSection(Sec);
- for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
- MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
- for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
- MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
- // Lead with a space for consistency with our dllexport implementation.
- std::string Directive(" ");
- Directive.append(MDOption->getString());
- Streamer.EmitBytes(Directive);
+ if (LinkerOptions) {
+ // Emit the linker options to the linker .drectve section. According to the
+ // spec, this section is a space-separated string containing flags for
+ // linker.
+ MCSection *Sec = getDrectveSection();
+ Streamer.SwitchSection(Sec);
+ for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto &Piece : cast<MDNode>(Option)->operands()) {
+ // Lead with a space for consistency with our dllexport implementation.
+ std::string Directive(" ");
+ Directive.append(cast<MDString>(Piece)->getString());
+ Streamer.EmitBytes(Directive);
+ }
}
}
}
@@ -1035,13 +1055,13 @@ emitModuleFlags(MCStreamer &Streamer,
MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getContext().getAssociativeCOFFSection(
- cast<MCSectionCOFF>(StaticCtorSection), KeySym);
+ cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0);
}
MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getContext().getAssociativeCOFFSection(
- cast<MCSectionCOFF>(StaticDtorSection), KeySym);
+ cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0);
}
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index 873f712..b8c8209 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1,4 +1,4 @@
-//===-- Passes.cpp - Target independent code generation passes ------------===//
+//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,22 +12,26 @@
//
//===---------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
@@ -106,9 +110,19 @@ cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
-static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen",
- cl::init(false), cl::Hidden,
- cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"));
+// Experimental option to use CFL-AA in codegen
+enum class CFLAAType { None, Steensgaard, Andersen, Both };
+static cl::opt<CFLAAType> UseCFLAA(
+ "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"),
+ cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
+ clEnumValN(CFLAAType::Steensgaard, "steens",
+ "Enable unification-based CFL-AA"),
+ clEnumValN(CFLAAType::Andersen, "anders",
+ "Enable inclusion-based CFL-AA"),
+ clEnumValN(CFLAAType::Both, "both",
+ "Enable both variants of CFL-AA"),
+ clEnumValEnd));
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
@@ -241,7 +255,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
StopAfter(nullptr), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true) {
+ DisableVerify(false), EnableTailMerge(true) {
Impl = new PassConfigImpl();
@@ -256,6 +270,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// Substitute Pseudo Pass IDs for real ones.
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+}
+
+CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
+ return TM->getOptLevel();
}
/// Insert InsertedPassID pass after TargetPassID.
@@ -303,6 +324,13 @@ IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
return I->second;
}
+bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
+ IdentifyingPassPtr TargetID = getPassSubstitution(ID);
+ IdentifyingPassPtr FinalPtr = overridePass(ID, TargetID);
+ return !FinalPtr.isValid() || FinalPtr.isInstance() ||
+ FinalPtr.getID() != ID;
+}
+
/// Add a pass to the PassManager if that pass is supposed to be run. If the
/// Started/Stopped flags indicate either that the compilation should start at
/// a later pass or that it should stop after an earlier pass, then do not add
@@ -392,12 +420,25 @@ void TargetPassConfig::addVerifyPass(const std::string &Banner) {
/// Add common target configurable passes that perform LLVM IR to IR transforms
/// following machine independent optimization.
void TargetPassConfig::addIRPasses() {
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ case CFLAAType::Andersen:
+ addPass(createCFLAndersAAWrapperPass());
+ break;
+ case CFLAAType::Both:
+ addPass(createCFLAndersAAWrapperPass());
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ default:
+ break;
+ }
+
// Basic AliasAnalysis support.
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
- if (UseCFLAA)
- addPass(createCFLAAWrapperPass());
addPass(createTypeBasedAAWrapperPass());
addPass(createScopedNoAliasAAWrapperPass());
addPass(createBasicAAWrapperPass());
@@ -476,6 +517,10 @@ void TargetPassConfig::addCodeGenPrepare() {
void TargetPassConfig::addISelPrepare() {
addPreISel();
+ // Force codegen to run according to the callgraph.
+ if (TM->Options.EnableIPRA)
+ addPass(new DummyCGSCCPass);
+
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
addPass(createSafeStackPass(TM));
@@ -512,12 +557,12 @@ void TargetPassConfig::addISelPrepare() {
void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
+ if (TM->Options.EnableIPRA)
+ addPass(createRegUsageInfoPropPass());
+
// Insert a machine instr printer pass after the specified pass.
- // If -print-machineinstrs specified, print machineinstrs after all passes.
- if (StringRef(PrintMachineInstrs.getValue()).equals(""))
- TM->Options.PrintMachineCode = true;
- else if (!StringRef(PrintMachineInstrs.getValue())
- .equals("option-unspecified")) {
+ if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
+ !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
const PassRegistry *PR = PassRegistry::getPassRegistry();
const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
@@ -556,10 +601,13 @@ void TargetPassConfig::addMachinePasses() {
addPostRegAlloc();
// Insert prolog/epilog code. Eliminate abstract frame index references...
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None)
addPass(&ShrinkWrapID);
- addPass(&PrologEpilogCodeInserterID);
+ // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
+ // do so if it hasn't been disabled, substituted, or overridden.
+ if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID))
+ addPass(createPrologEpilogInserterPass(TM));
/// Add passes that optimize machine instructions after register allocation.
if (getOptLevel() != CodeGenOpt::None)
@@ -597,11 +645,19 @@ void TargetPassConfig::addMachinePasses() {
addPreEmitPass();
+ if (TM->Options.EnableIPRA)
+ // Collect register usage information and produce a register mask of
+ // clobbered registers, to be used to optimize call sites.
+ addPass(createRegUsageInfoCollector());
+
addPass(&FuncletLayoutID, false);
addPass(&StackMapLivenessID, false);
addPass(&LiveDebugValuesID, false);
+ addPass(&XRayInstrumentationID, false);
+ addPass(&PatchableFunctionID, false);
+
AddingMachinePasses = false;
}
@@ -661,6 +717,7 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
+LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag);
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
static RegisterRegAlloc
defaultRegAlloc("default",
@@ -674,6 +731,15 @@ RegAlloc("regalloc",
cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use"));
+static void initializeDefaultRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+}
+
/// Instantiate the default register allocator pass for this target for either
/// the optimized or unoptimized allocation path. This will be added to the pass
@@ -700,13 +766,11 @@ FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
/// this can be folded into addPass.
FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
- RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
// Initialize the global default.
- if (!Ctor) {
- Ctor = RegAlloc;
- RegisterRegAlloc::setDefault(RegAlloc);
- }
+ llvm::call_once(InitializeDefaultRegisterAllocatorFlag,
+ initializeDefaultRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
if (Ctor != useDefaultRegisterAllocator)
return Ctor();
@@ -734,6 +798,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&DetectDeadLanesID, false);
+
addPass(&ProcessImplicitDefsID, false);
// LiveVariables currently requires pure SSA form.
@@ -755,6 +821,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&TwoAddressInstructionPassID, false);
addPass(&RegisterCoalescerID);
+ // The machine scheduler may accidentally create disconnected components
+ // when moving subregister definitions around, avoid this by splitting them to
+ // separate vregs before. Splitting can also improve reg. allocation quality.
+ addPass(&RenameIndependentSubregsID);
+
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
@@ -809,7 +880,7 @@ bool TargetPassConfig::addGCPasses() {
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
- if (addPass(&MachineBlockPlacementID, false)) {
+ if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
addPass(&MachineBlockPlacementStatsID);
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 0a7042a..e1d90cb 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -112,18 +112,11 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
if (!RC || RC->isAllocatable())
return RC;
- const unsigned *SubClass = RC->getSubClassMask();
- for (unsigned Base = 0, BaseE = getNumRegClasses();
- Base < BaseE; Base += 32) {
- unsigned Idx = Base;
- for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) {
- unsigned Offset = countTrailingZeros(Mask);
- const TargetRegisterClass *SubRC = getRegClass(Idx + Offset);
- if (SubRC->isAllocatable())
- return SubRC;
- Mask >>= Offset;
- Idx += Offset + 1;
- }
+ for (BitMaskClassIterator It(RC->getSubClassMask(), *this); It.isValid();
+ ++It) {
+ const TargetRegisterClass *SubRC = getRegClass(It.getID());
+ if (SubRC->isAllocatable())
+ return SubRC;
}
return nullptr;
}
@@ -388,6 +381,15 @@ bool TargetRegisterInfo::needsStackRealignment(
return false;
}
+bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
+ const uint32_t *mask1) const {
+ unsigned N = (getNumRegs()+31) / 32;
+ for (unsigned I = 0; I < N; ++I)
+ if ((mask0[I] & mask1[I]) != mask0[I])
+ return false;
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void
TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 1c4558c..022e912 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -77,7 +77,7 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrItineraries()) {
int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
- return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI);
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
}
if (hasInstrSchedModel()) {
if (!SC)
@@ -156,13 +156,13 @@ unsigned TargetSchedModel::computeOperandLatency(
const MachineInstr *UseMI, unsigned UseOperIdx) const {
if (!hasInstrSchedModel() && !hasInstrItineraries())
- return TII->defaultDefLatency(SchedModel, DefMI);
+ return TII->defaultDefLatency(SchedModel, *DefMI);
if (hasInstrItineraries()) {
int OperLatency = 0;
if (UseMI) {
- OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx,
- UseMI, UseOperIdx);
+ OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
+ *UseMI, UseOperIdx);
}
else {
unsigned DefClass = DefMI->getDesc().getSchedClass();
@@ -172,15 +172,15 @@ unsigned TargetSchedModel::computeOperandLatency(
return OperLatency;
// No operand latency was found.
- unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI);
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
// Expected latency is the max of the stage latency and itinerary props.
// Rather than directly querying InstrItins stage latency, we call a TII
// hook to allow subtargets to specialize latency. This hook is only
// applicable to the InstrItins model. InstrSchedModel should model all
// special cases without TII hooks.
- InstrLatency = std::max(InstrLatency,
- TII->defaultDefLatency(SchedModel, DefMI));
+ InstrLatency =
+ std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
return InstrLatency;
}
// hasInstrSchedModel()
@@ -219,7 +219,7 @@ unsigned TargetSchedModel::computeOperandLatency(
// FIXME: Automatically giving all implicit defs defaultDefLatency is
// undesirable. We should only do it for defs that are known to the MC
// desc like flags. Truly implicit defs should get 1 cycle latency.
- return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI);
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
}
unsigned
@@ -254,24 +254,23 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
// Allow subtargets to compute Bundle latencies outside the machine model.
if (hasInstrItineraries() || MI->isBundle() ||
(!hasInstrSchedModel() && !UseDefaultDefLatency))
- return TII->getInstrLatency(&InstrItins, MI);
+ return TII->getInstrLatency(&InstrItins, *MI);
if (hasInstrSchedModel()) {
const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
if (SCDesc->isValid())
return computeInstrLatency(*SCDesc);
}
- return TII->defaultDefLatency(SchedModel, MI);
+ return TII->defaultDefLatency(SchedModel, *MI);
}
unsigned TargetSchedModel::
computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
const MachineInstr *DepMI) const {
- if (SchedModel.MicroOpBufferSize <= 1)
+ if (!SchedModel.isOutOfOrder())
return 1;
- // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch
- // WAW dependencies in the same cycle.
+ // Out-of-order processor can dispatch WAW dependencies in the same cycle.
// Treat predication as a data dependency for out-of-order cpus. In-order
// cpus do not need to treat predicated writes specially.
@@ -282,7 +281,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getParent()->getParent();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI))
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
return computeInstrLatency(DefMI);
// If we have a per operand scheduling model, check if this def is writing
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index c6bae24..8feb18b 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -27,11 +27,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -40,6 +38,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
@@ -50,6 +49,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "twoaddrinstr"
@@ -156,6 +156,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addUsedIfAvailable<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
@@ -245,7 +246,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI == OldPos || KillMI->isTerminator())
+ MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -259,16 +260,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
++KillPos;
unsigned NumVisited = 0;
- for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) {
- MachineInstr *OtherMI = I;
+ for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) {
// DBG_VALUE cannot be counted against the limit.
- if (OtherMI->isDebugValue())
+ if (OtherMI.isDebugValue())
continue;
if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
- for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = OtherMI->getOperand(i);
+ for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI.getOperand(i);
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -277,8 +277,8 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
if (DefReg == MOReg)
return false;
- if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) {
- if (OtherMI == KillMI && MOReg == SavedReg)
+ if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) {
+ if (&OtherMI == KillMI && MOReg == SavedReg)
// Save the operand that kills the register. We want to unset the kill
// marker if we can sink MI past it.
KillMO = &MO;
@@ -297,7 +297,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
KillMO->setIsKill(true);
if (LV)
- LV->replaceKillInstruction(SavedReg, KillMI, MI);
+ LV->replaceKillInstruction(SavedReg, *KillMI, *MI);
}
// Move instruction to its destination.
@@ -305,7 +305,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
MBB->insert(KillPos, MI);
if (LIS)
- LIS->handleMove(MI);
+ LIS->handleMove(*MI);
++Num3AddrSunk;
return true;
@@ -400,7 +400,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
LiveIntervals *LIS) {
if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LIS->isNotInMIMap(MI)) {
+ !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
// sets a kill before recursively calling tryInstructionTransform() again.
@@ -413,7 +413,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
if (!LI.hasAtLeastOneValue())
return false;
- SlotIndex useIdx = LIS->getInstructionIndex(MI);
+ SlotIndex useIdx = LIS->getInstructionIndex(*MI);
LiveInterval::const_iterator I = LI.find(useIdx);
assert(I != LI.end() && "Reg must be live-in to use.");
return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
@@ -539,6 +539,15 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
return TRI->regsOverlap(RegA, RegB);
}
+// Returns true if Reg is equal or aliased to at least one register in Set.
+static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned R : Set)
+ if (TRI->regsOverlap(R, Reg))
+ return true;
+
+ return false;
+}
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
@@ -647,7 +656,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
unsigned Dist) {
unsigned RegC = MI->getOperand(RegCIdx).getReg();
DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
- MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx);
+ MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
if (NewMI == nullptr) {
DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
@@ -695,7 +704,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
unsigned Dist) {
// FIXME: Why does convertToThreeAddress() need an iterator reference?
MachineFunction::iterator MFI = MBB->getIterator();
- MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
+ MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
assert(MBB->getIterator() == MFI &&
"convertToThreeAddress changed iterator reference");
if (!NewMI)
@@ -706,7 +715,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
bool Sunk = false;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(mi, NewMI);
+ LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
if (NewMI->findRegisterUseOperand(RegB, false, TRI))
// FIXME: Temporary workaround. If the new instruction doesn't
@@ -808,7 +817,6 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
}
Processed.insert(MI);
- return;
}
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
@@ -862,13 +870,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MI->isSafeToMove(AA, SeenStore))
return false;
- if (TII->getInstrLatency(InstrItins, MI) > 1)
+ if (TII->getInstrLatency(InstrItins, *MI) > 1)
// FIXME: Needs more sophisticated heuristics.
return false;
- SmallSet<unsigned, 2> Uses;
- SmallSet<unsigned, 2> Kills;
- SmallSet<unsigned, 2> Defs;
+ SmallVector<unsigned, 2> Uses;
+ SmallVector<unsigned, 2> Kills;
+ SmallVector<unsigned, 2> Defs;
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
@@ -876,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isDef())
- Defs.insert(MOReg);
+ Defs.push_back(MOReg);
else {
- Uses.insert(MOReg);
+ Uses.push_back(MOReg);
if (MOReg != Reg && (MO.isKill() ||
(LIS && isPlainlyKilled(MI, MOReg, LIS))))
- Kills.insert(MOReg);
+ Kills.push_back(MOReg);
}
}
@@ -890,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator AfterMI = std::next(Begin);
MachineBasicBlock::iterator End = AfterMI;
- while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
- Defs.insert(End->getOperand(0).getReg());
+ while (End->isCopy() &&
+ regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
+ Defs.push_back(End->getOperand(0).getReg());
++End;
}
@@ -899,47 +908,46 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
- for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) {
- MachineInstr *OtherMI = I;
+ for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) {
// DBG_VALUE cannot be counted against the limit.
- if (OtherMI->isDebugValue())
+ if (OtherMI.isDebugValue())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
- if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
- OtherMI->isBranch() || OtherMI->isTerminator())
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
// Don't move pass calls, etc.
return false;
- for (const MachineOperand &MO : OtherMI->operands()) {
+ for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isDef()) {
- if (Uses.count(MOReg))
+ if (regOverlapsSet(Uses, MOReg, TRI))
// Physical register use would be clobbered.
return false;
- if (!MO.isDead() && Defs.count(MOReg))
+ if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
// May clobber a physical register def.
// FIXME: This may be too conservative. It's ok if the instruction
// is sunken completely below the use.
return false;
} else {
- if (Defs.count(MOReg))
+ if (regOverlapsSet(Defs, MOReg, TRI))
return false;
- bool isKill = MO.isKill() ||
- (LIS && isPlainlyKilled(OtherMI, MOReg, LIS));
- if (MOReg != Reg &&
- ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
+ bool isKill =
+ MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
+ if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
+ regOverlapsSet(Kills, MOReg, TRI)))
// Don't want to extend other live ranges and update kills.
return false;
if (MOReg == Reg && !isKill)
// We can't schedule across a use of the register in question.
return false;
// Ensure that if this is register in question, its the kill we expect.
- assert((MOReg != Reg || OtherMI == KillMI) &&
+ assert((MOReg != Reg || &OtherMI == KillMI) &&
"Found multiple kills of a register in a basic block");
}
}
@@ -955,10 +963,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// We have to move the copies first so that the MBB is still well-formed
// when calling handleMove().
for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
- MachineInstr *CopyMI = MBBI;
- ++MBBI;
+ auto CopyMI = MBBI++;
MBB->splice(InsertPos, MBB, CopyMI);
- LIS->handleMove(CopyMI);
+ LIS->handleMove(*CopyMI);
InsertPos = CopyMI;
}
End = std::next(MachineBasicBlock::iterator(MI));
@@ -970,10 +977,10 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Update live variables
if (LIS) {
- LIS->handleMove(MI);
+ LIS->handleMove(*MI);
} else {
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
}
DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
@@ -994,7 +1001,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
return true; // Below MI
unsigned DefDist = DDI->second;
assert(Dist > DefDist && "Visited def already?");
- if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist))
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
return true;
}
return false;
@@ -1074,21 +1081,20 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Check if the reschedule will not break depedencies.
unsigned NumVisited = 0;
- MachineBasicBlock::iterator KillPos = KillMI;
- for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
- MachineInstr *OtherMI = I;
+ for (MachineInstr &OtherMI :
+ llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) {
// DBG_VALUE cannot be counted against the limit.
- if (OtherMI->isDebugValue())
+ if (OtherMI.isDebugValue())
continue;
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
- if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
- OtherMI->isBranch() || OtherMI->isTerminator())
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
// Don't move pass calls, etc.
return false;
SmallVector<unsigned, 2> OtherDefs;
- for (const MachineOperand &MO : OtherMI->operands()) {
+ for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
unsigned MOReg = MO.getReg();
@@ -1102,8 +1108,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (Kills.count(MOReg))
// Don't want to extend other live ranges and update kills.
return false;
- if (OtherMI != MI && MOReg == Reg &&
- !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))))
+ if (&OtherMI != MI && MOReg == Reg &&
+ !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))))
// We can't schedule across a use of the register in question.
return false;
} else {
@@ -1138,10 +1144,10 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Update live variables
if (LIS) {
- LIS->handleMove(KillMI);
+ LIS->handleMove(*KillMI);
} else {
- LV->removeVirtualRegisterKilled(Reg, KillMI);
- LV->addVirtualRegisterKilled(Reg, MI);
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
}
DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
@@ -1175,7 +1181,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// other commutable operands and does not change the values of passed
// variables.
if (OtherOpIdx == BaseOpIdx ||
- !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx))
+ !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
continue;
unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
@@ -1308,9 +1314,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
- /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
- NewMIs)) {
+ if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
return false;
}
@@ -1347,25 +1353,25 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
- LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]);
else {
assert(NewMIs[1]->killsRegister(MO.getReg()) &&
"Kill missing after load unfold!");
- LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]);
}
}
- } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) {
if (NewMIs[1]->registerDefIsDead(MO.getReg()))
- LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]);
else {
assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
"Dead flag missing after load unfold!");
- LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]);
}
}
}
}
- LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ LV->addVirtualRegisterKilled(Reg, *NewMIs[1]);
}
SmallVector<unsigned, 4> OrigRegs;
@@ -1518,17 +1524,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// Update DistanceMap.
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
- DistanceMap.insert(std::make_pair(PrevMI, Dist));
+ DistanceMap.insert(std::make_pair(&*PrevMI, Dist));
DistanceMap[MI] = ++Dist;
if (LIS) {
- LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot();
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
if (TargetRegisterInfo::isVirtualRegister(RegA)) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
- LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber);
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
}
}
@@ -1574,16 +1580,16 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
// Update live variables for regB.
- if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
- LV->addVirtualRegisterKilled(RegB, PrevMI);
+ LV->addVirtualRegisterKilled(RegB, *PrevMI);
}
// Update LiveIntervals.
if (LIS) {
LiveInterval &LI = LIS->getInterval(RegB);
- SlotIndex MIIdx = LIS->getInstructionIndex(MI);
+ SlotIndex MIIdx = LIS->getInstructionIndex(*MI);
LiveInterval::const_iterator I = LI.find(MIIdx);
assert(I != LI.end() && "RegB must be live-in to use.");
@@ -1650,13 +1656,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
if (mi->isRegSequence())
eliminateRegSequence(mi);
- DistanceMap.insert(std::make_pair(mi, ++Dist));
+ DistanceMap.insert(std::make_pair(&*mi, ++Dist));
processCopy(&*mi);
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
- if (!collectTiedOperands(mi, TiedOperands)) {
+ if (!collectTiedOperands(&*mi, TiedOperands)) {
mi = nmi;
continue;
}
@@ -1689,7 +1695,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// Now iterate over the information collected above.
for (auto &TO : TiedOperands) {
- processTiedPairs(mi, TO.second, Dist);
+ processTiedPairs(&*mi, TO.second, Dist);
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
@@ -1733,27 +1739,27 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
///
void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
- MachineInstr *MI = MBBI;
- unsigned DstReg = MI->getOperand(0).getReg();
- if (MI->getOperand(0).getSubReg() ||
+ MachineInstr &MI = *MBBI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() ||
TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- !(MI->getNumOperands() & 1)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ !(MI.getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
}
SmallVector<unsigned, 4> OrigRegs;
if (LIS) {
- OrigRegs.push_back(MI->getOperand(0).getReg());
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
- OrigRegs.push_back(MI->getOperand(i).getReg());
+ OrigRegs.push_back(MI.getOperand(0).getReg());
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI.getOperand(i).getReg());
}
bool DefEmitted = false;
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
- MachineOperand &UseMO = MI->getOperand(i);
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
+ MachineOperand &UseMO = MI.getOperand(i);
unsigned SrcReg = UseMO.getReg();
- unsigned SubIdx = MI->getOperand(i+1).getImm();
+ unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for <undef> operands.
if (UseMO.isUndef())
continue;
@@ -1763,18 +1769,18 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
bool isKill = UseMO.isKill();
if (isKill)
for (unsigned j = i + 2; j < e; j += 2)
- if (MI->getOperand(j).getReg() == SrcReg) {
- MI->getOperand(j).setIsKill();
+ if (MI.getOperand(j).getReg() == SrcReg) {
+ MI.getOperand(j).setIsKill();
UseMO.setIsKill(false);
isKill = false;
break;
}
// Insert the sub-register copy.
- MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, SubIdx)
- .addOperand(UseMO);
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .addOperand(UseMO);
// The first def needs an <undef> flag because there is no live register
// before it.
@@ -1787,7 +1793,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
// Update LiveVariables' kill info.
if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
- LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+ LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
@@ -1796,13 +1802,13 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
std::next(MachineBasicBlock::iterator(MI));
if (!DefEmitted) {
- DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
- MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
- for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
- MI->RemoveOperand(j);
+ DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
+ MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI.RemoveOperand(j);
} else {
- DEBUG(dbgs() << "Eliminated: " << *MI);
- MI->eraseFromParent();
+ DEBUG(dbgs() << "Eliminated: " << MI);
+ MI.eraseFromParent();
}
// Udpate LiveIntervals.
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 8c9631e..501e01c 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -20,7 +20,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/UnreachableBlockElim.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
@@ -38,29 +39,7 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-namespace {
- class UnreachableBlockElim : public FunctionPass {
- bool runOnFunction(Function &F) override;
- public:
- static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(ID) {
- initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
- };
-}
-char UnreachableBlockElim::ID = 0;
-INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
- "Remove unreachable blocks from the CFG", false, false)
-
-FunctionPass *llvm::createUnreachableBlockEliminationPass() {
- return new UnreachableBlockElim();
-}
-
-bool UnreachableBlockElim::runOnFunction(Function &F) {
+static bool eliminateUnreachableBlock(Function &F) {
SmallPtrSet<BasicBlock*, 8> Reachable;
// Mark all reachable blocks.
@@ -91,6 +70,41 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
return !DeadBlocks.empty();
}
+namespace {
+class UnreachableBlockElimLegacyPass : public FunctionPass {
+ bool runOnFunction(Function &F) override {
+ return eliminateUnreachableBlock(F);
+ }
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElimLegacyPass() : FunctionPass(ID) {
+ initializeUnreachableBlockElimLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+}
+char UnreachableBlockElimLegacyPass::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElimLegacyPass, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElimLegacyPass();
+}
+
+PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = eliminateUnreachableBlock(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
namespace {
class UnreachableMachineBlockElim : public MachineFunctionPass {
@@ -184,9 +198,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
unsigned Input = phi->getOperand(1).getReg();
unsigned Output = phi->getOperand(0).getReg();
- MachineInstr* temp = phi;
- ++phi;
- temp->eraseFromParent();
+ phi++->eraseFromParent();
ModifiedPHI = true;
if (Input != Output) {
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index bf1c0dc..8a3a032 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "LiveDebugVariables.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -29,7 +28,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -84,7 +82,7 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
unsigned Hint = MRI->getSimpleHint(VirtReg);
if (!Hint)
- return 0;
+ return false;
if (TargetRegisterInfo::isVirtualRegister(Hint))
Hint = getPhys(Hint);
return getPhys(VirtReg) == Hint;
@@ -139,7 +137,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VirtRegMap::dump() const {
+LLVM_DUMP_METHOD void VirtRegMap::dump() const {
print(dbgs());
}
#endif
@@ -168,6 +166,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+ void handleIdentityCopy(MachineInstr &MI) const;
public:
static char ID;
@@ -176,6 +175,10 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction&) override;
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
};
} // end anonymous namespace
@@ -329,7 +332,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
unsigned Reg = MO.getReg();
const LiveInterval &LI = LIS->getInterval(Reg);
const MachineInstr &MI = *MO.getParent();
- SlotIndex BaseIndex = LIS->getInstructionIndex(&MI);
+ SlotIndex BaseIndex = LIS->getInstructionIndex(MI);
// This code is only meant to handle reading undefined subregisters which
// we couldn't properly detect before.
assert(LI.liveAt(BaseIndex) &&
@@ -344,6 +347,30 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
+void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
+ if (!MI.isIdentityCopy())
+ return;
+ DEBUG(dbgs() << "Identity copy: " << MI);
+ ++NumIdCopies;
+
+ // Copies like:
+ // %R0 = COPY %R0<undef>
+ // %AL = COPY %AL, %EAX<imp-def>
+ // give us additional liveness information: The target (super-)register
+ // must not be valid before this point. Replace the COPY with a KILL
+ // instruction to maintain this information.
+ if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+ MI.setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << " replace by: " << MI);
+ return;
+ }
+
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ DEBUG(dbgs() << " deleted.\n");
+}
+
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@@ -433,16 +460,8 @@ void VirtRegRewriter::rewrite() {
DEBUG(dbgs() << "> " << *MI);
- // Finally, remove any identity copies.
- if (MI->isIdentityCopy()) {
- ++NumIdCopies;
- DEBUG(dbgs() << "Deleting identity copy.\n");
- if (Indexes)
- Indexes->removeMachineInstrFromMaps(MI);
- // It's safe to erase MI because MII has already been incremented.
- MI->eraseFromParent();
- }
+ // We can remove identity copies right now.
+ handleIdentityCopy(*MI);
}
}
}
-
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 14ec911..041fb7b 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -254,9 +254,11 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
for (const User *U : CatchPad->users()) {
const auto *UserI = cast<Instruction>(U);
- if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
- if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ }
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
// If a nested cleanup pad reports a null unwind destination and the
@@ -361,9 +363,11 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
// outside the __try.
for (const User *U : CatchPad->users()) {
const auto *UserI = cast<Instruction>(U);
- if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
- if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ }
if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
// If a nested cleanup pad reports a null unwind destination and the
@@ -783,7 +787,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
// Loop over all instructions, fixing each one as we find it...
for (Instruction &I : *BB)
RemapInstruction(&I, VMap,
- RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
+ RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
// Catchrets targeting cloned blocks need to be updated separately from
// the loop above because they are not in the current funclet.
@@ -795,7 +799,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
FixupCatchrets.clear();
for (BasicBlock *Pred : predecessors(OldBlock))
if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
- if (CatchRet->getParentPad() == FuncletToken)
+ if (CatchRet->getCatchSwitchParentPad() == FuncletToken)
FixupCatchrets.push_back(CatchRet);
for (CatchReturnInst *CatchRet : FixupCatchrets)
@@ -810,7 +814,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
bool EdgeTargetsFunclet;
if (auto *CRI =
dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
- EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken);
+ EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken);
} else {
ColorVector &IncomingColors = BlockColors[IncomingBlock];
assert(!IncomingColors.empty() && "Block not colored!");
@@ -944,10 +948,11 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) {
if (FuncletBundleOperand == FuncletPad)
continue;
- // Skip call sites which are nounwind intrinsics.
+ // Skip call sites which are nounwind intrinsics or inline asm.
auto *CalledFn =
dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+ if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) ||
+ CS.isInlineAsm()))
continue;
// This call site was not part of this funclet, remove it.
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
new file mode 100644
index 0000000..1f95708
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -0,0 +1,96 @@
+//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a MachineFunctionPass that inserts the appropriate
+// XRay instrumentation instructions. We look for XRay-specific attributes
+// on the function to determine whether we should insert the replacement
+// operations.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct XRayInstrumentation : public MachineFunctionPass {
+ static char ID;
+
+ XRayInstrumentation() : MachineFunctionPass(ID) {
+ initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
+ auto &F = *MF.getFunction();
+ auto InstrAttr = F.getFnAttribute("function-instrument");
+ bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
+ InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-always";
+ Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
+ unsigned XRayThreshold = 0;
+ if (!AlwaysInstrument) {
+ if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
+ return false; // XRay threshold attribute not found.
+ if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
+ return false; // Invalid value for threshold.
+ if (F.size() < XRayThreshold)
+ return false; // Function is too small.
+ }
+
+ // FIXME: Do the loop triviality analysis here or in an earlier pass.
+
+ // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
+ // MachineFunction.
+ auto &FirstMBB = *MF.begin();
+ auto &FirstMI = *FirstMBB.begin();
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+
+ // Then we look for *all* terminators and returns, then replace those with
+ // PATCHABLE_RET instructions.
+ SmallVector<MachineInstr *, 4> Terminators;
+ for (auto &MBB : MF) {
+ for (auto &T : MBB.terminators()) {
+ // FIXME: Handle tail calls here too?
+ if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+ // Replace return instructions with:
+ // PATCHABLE_RET <Opcode>, <Operand>...
+ auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_RET))
+ .addImm(T.getOpcode());
+ for (auto &MO : T.operands())
+ MIB.addOperand(MO);
+ Terminators.push_back(&T);
+ break;
+ }
+ }
+ }
+
+ for (auto &I : Terminators)
+ I->eraseFromParent();
+
+ return true;
+}
+
+char XRayInstrumentation::ID = 0;
+char &llvm::XRayInstrumentationID = XRayInstrumentation::ID;
+INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops",
+ false, false)
diff --git a/contrib/llvm/lib/CodeGen/module.modulemap b/contrib/llvm/lib/CodeGen/module.modulemap
deleted file mode 100644
index d4f68bc..0000000
--- a/contrib/llvm/lib/CodeGen/module.modulemap
+++ /dev/null
@@ -1 +0,0 @@
-module CodeGen { requires cplusplus umbrella "." module * { export * } }
OpenPOWER on IntegriCloud