summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h19
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp367
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp254
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h67
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp152
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp136
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h55
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def55
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp288
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h47
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp415
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h53
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp342
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h144
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp238
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h65
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp20
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/BranchCoalescing.cpp758
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp245
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h44
-rw-r--r--contrib/llvm/lib/CodeGen/BranchRelaxation.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/BuiltinGCs.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp2346
-rw-r--r--contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp89
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp34
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp472
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandReductions.cpp167
-rw-r--r--contrib/llvm/lib/CodeGen/FEntryInserter.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/FaultMaps.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp41
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp899
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp191
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp549
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp123
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp357
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp117
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp165
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp135
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp229
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp46
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp172
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h2
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp33
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp209
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp90
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp109
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegUnits.cpp132
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LowLevelType.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h8
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp306
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h21
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp191
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp231
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.h33
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp70
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp91
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp1167
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp244
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp236
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp236
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp85
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp108
-rw-r--r--contrib/llvm/lib/CodeGen/MachineOutliner.cpp1251
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePipeliner.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePostDominators.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp64
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp436
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp72
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/MacroFusion.cpp153
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PatchableFunction.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp172
-rw-r--r--contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp172
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp203
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp110
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp48
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp336
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp505
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp188
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp656
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp501
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp518
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp4421
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp151
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp263
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp110
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp314
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp444
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp91
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp467
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1651
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp115
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1997
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h139
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp813
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp738
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp143
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h11
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp335
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp77
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplicator.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp169
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp378
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp194
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp110
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp95
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp79
219 files changed, 24301 insertions, 11003 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bb90861..5abf50e 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -128,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
}
DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
- DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
- r = CriticalPathSet.find_next(r))
+ DEBUG(for (unsigned r : CriticalPathSet.set_bits())
dbgs() << " " << TRI->getName(r));
DEBUG(dbgs() << '\n');
}
@@ -163,9 +162,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
unsigned Reg = *I;
- if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ if (!IsReturnBlock && !Pristine.test(Reg))
+ continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
State->UnionGroups(AliasReg, 0);
@@ -569,7 +570,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
DEBUG({
dbgs() << " ::";
- for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ for (unsigned r : BV.set_bits())
dbgs() << " " << TRI->getName(r);
dbgs() << "\n";
});
@@ -962,10 +963,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// sure to update that as well.
const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
if (!SU) continue;
- for (DbgValueVector::iterator DVI = DbgValues.begin(),
- DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q.second.Operand->getParent())
- UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValues(DbgValues, Q.second.Operand->getParent(),
+ AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index 79ecc43..c2aecc6 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -24,8 +24,8 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -516,10 +516,9 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
ADS = true;
- AttrBuilder CallerAttrs(F->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttributeList::ReturnIndex);
// Noalias is completely benign as far as calling convention goes, it
// shouldn't affect whether the call is a tail call.
@@ -613,25 +612,6 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
-bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
- if (!GV->hasLinkOnceODRLinkage())
- return false;
-
- // We assume that anyone who sets global unnamed_addr on a non-constant knows
- // what they're doing.
- if (GV->hasGlobalUnnamedAddr())
- return true;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- return GV->hasAtLeastLocalUnnamedAddr();
-}
-
static void collectFuncletMembers(
DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
const MachineBasicBlock *MBB) {
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
index 04f7f41..d14d931 100644
--- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -60,6 +60,25 @@ public:
if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg)
MI.getOperand(0).setReg(NewReg);
}
+
+ /// Update all DBG_VALUE instructions that may be affected by the dependency
+ /// breaker's update of ParentMI to use NewReg.
+ void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI,
+ unsigned OldReg, unsigned NewReg) {
+ // The following code is dependent on the order in which the DbgValues are
+ // constructed in ScheduleDAGInstrs::buildSchedGraph.
+ MachineInstr *PrevDbgMI = nullptr;
+ for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) {
+ MachineInstr *PrevMI = DV.second;
+ if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) {
+ MachineInstr *DbgMI = DV.first;
+ UpdateDbgValue(*DbgMI, OldReg, NewReg);
+ PrevDbgMI = DbgMI;
+ } else if (PrevDbgMI) {
+ break; // If no match and already found a DBG_VALUE, we're done.
+ }
+ }
+ }
};
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 61149d9..8b1376a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -14,6 +14,7 @@
#include "DwarfException.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -27,7 +28,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 24fdbfc..ff427c9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -12,47 +12,101 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
+#include "AsmPrinterHandler.h"
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
#include "WinException.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -69,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static cl::opt<bool>
+ PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
+ cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+
char AsmPrinter::ID = 0;
typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
@@ -78,7 +136,6 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
-
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
@@ -107,16 +164,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
- OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
- DD = nullptr;
- MMI = nullptr;
- LI = nullptr;
- MF = nullptr;
- CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
- CurrentFnBegin = nullptr;
- CurrentFnEnd = nullptr;
- GCMetadataPrinters = nullptr;
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) {
VerboseAsm = OutStreamer->isVerboseAsm();
}
@@ -171,6 +219,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
if (isVerbose())
AU.addRequired<MachineLoopInfo>();
@@ -223,7 +272,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->EmitFileDirective(M.getModuleIdentifier());
+ OutStreamer->EmitFileDirective(M.getSourceFileName());
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -571,7 +620,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
unsigned Size) const {
OutStreamer->EmitValue(Value, Size);
}
@@ -579,12 +628,17 @@ void AsmPrinter::EmitDebugValue(const MCExpr *Value,
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
+ const Function *F = MF->getFunction();
+
+ if (isVerbose())
+ OutStreamer->GetCommentOS()
+ << "-- Begin function "
+ << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n';
+
// Print out constants referenced by the function
EmitConstantPool();
// Print the 'header' of function.
- const Function *F = MF->getFunction();
-
OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
EmitVisibility(CurrentFnSym, F->getVisibility());
@@ -602,8 +656,23 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit the prefix data.
- if (F->hasPrefixData())
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ if (F->hasPrefixData()) {
+ if (MAI->hasSubsectionsViaSymbols()) {
+ // Preserving prefix data on platforms which use subsections-via-symbols
+ // is a bit tricky. Here we introduce a symbol for the prefix data
+ // and use the .alt_entry attribute to mark the function's real entry point
+ // as an alternative entry point to the prefix-data symbol.
+ MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->EmitLabel(PrefixSym);
+
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+
+ // Emit an .alt_entry directive for the actual function symbol.
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+ } else {
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ }
+ }
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
@@ -660,7 +729,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+ AsmPrinter *AP) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -668,6 +738,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
int FI;
const MachineFrameInfo &MFI = MF->getFrameInfo();
+ bool Commented = false;
// We assume a single instruction only has a spill or reload, not
// both.
@@ -675,24 +746,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload\n";
+ CommentOS << MMO->getSize() << "-byte Reload";
+ Commented = true;
}
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Reload";
+ Commented = true;
+ }
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill\n";
+ CommentOS << MMO->getSize() << "-byte Spill";
+ Commented = true;
}
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Spill";
+ Commented = true;
+ }
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
+ Commented = true;
+ CommentOS << " Reload Reuse";
+ }
+
+ if (Commented && AP->EnablePrintSchedInfo)
+ // If any comment was added above and we need sched info comment then
+ // add this new comment just after the above comment w/o "\n" between them.
+ CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+ else if (Commented)
+ CommentOS << "\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -739,46 +825,30 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
const DILocalVariable *V = MI->getDebugVariable();
if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
- StringRef Name = SP->getDisplayName();
+ StringRef Name = SP->getName();
if (!Name.empty())
OS << Name << ":";
}
OS << V->getName();
-
- const DIExpression *Expr = MI->getDebugExpression();
- auto Fragment = Expr->getFragmentInfo();
- if (Fragment)
- OS << " [fragment offset=" << Fragment->OffsetInBits
- << " size=" << Fragment->SizeInBits << "]";
OS << " <- ";
// The second operand is only an offset if it's an immediate.
- bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
- int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
-
- for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
- uint64_t Op = Expr->getElement(i);
- if (Op == dwarf::DW_OP_LLVM_fragment) {
- // There can't be any operands after this in a valid expression
- break;
- } else if (Deref) {
- // We currently don't support extra Offsets or derefs after the first
- // one. Bail out early instead of emitting an incorrect comment
- OS << " [complex expression]";
- AP.OutStreamer->emitRawComment(OS.str());
- return true;
- } else if (Op == dwarf::DW_OP_deref) {
- Deref = true;
- continue;
- }
-
- uint64_t ExtraOffset = Expr->getElement(i++);
- if (Op == dwarf::DW_OP_plus)
- Offset += ExtraOffset;
- else {
- assert(Op == dwarf::DW_OP_minus);
- Offset -= ExtraOffset;
+ bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+ int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
+ const DIExpression *Expr = MI->getDebugExpression();
+ if (Expr->getNumElements()) {
+ OS << '[';
+ bool NeedSep = false;
+ for (auto Op : Expr->expr_ops()) {
+ if (NeedSep)
+ OS << ", ";
+ else
+ NeedSep = true;
+ OS << dwarf::OperationEncodingString(Op.getOp());
+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+ OS << ' ' << Op.getArg(I);
}
+ OS << "] ";
}
// Register or immediate value. Register 0 means undef.
@@ -809,7 +879,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
- Deref = true;
+ MemLoc = true;
}
if (Reg == 0) {
// Suppress offset, it is not meaningful here.
@@ -818,12 +888,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
AP.OutStreamer->emitRawComment(OS.str());
return true;
}
- if (Deref)
+ if (MemLoc)
OS << '[';
OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
- if (Deref)
+ if (MemLoc)
OS << '+' << Offset << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
@@ -855,6 +925,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
+ // If there is no "real" instruction following this CFI instruction, skip
+ // emitting it; it would be beyond the end of the function's FDE range.
+ auto *MBB = MI.getParent();
+ auto I = std::next(MI.getIterator());
+ while (I != MBB->end() && I->isTransient())
+ ++I;
+ if (I == MBB->instr_end() &&
+ MBB->getReverseIterator() == MBB->getParent()->rbegin())
+ return;
+
const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
@@ -871,6 +951,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
+static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF,
+ MachineModuleInfo *MMI) {
+ if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo())
+ return true;
+
+ // We might emit an EH table that uses function begin and end labels even if
+ // we don't have any landingpads.
+ if (!MF.getFunction()->hasPersonalityFn())
+ return false;
+ return !isNoOpWithoutInvoke(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+}
+
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
@@ -883,6 +976,7 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
+ int NumInstsInFunction = 0;
for (auto &MBB : *MF) {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
@@ -892,7 +986,7 @@ void AsmPrinter::EmitFunctionBody() {
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugValue()) {
HasAnyRealCode = true;
- ++EmittedInsts;
+ ++NumInstsInFunction;
}
if (ShouldPrintDebugScopes) {
@@ -905,7 +999,7 @@ void AsmPrinter::EmitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->GetCommentOS(), this);
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -953,18 +1047,34 @@ void AsmPrinter::EmitFunctionBody() {
EmitBasicBlockEnd(MBB);
}
+ EmittedInsts += NumInstsInFunction;
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
+ MF->getFunction()->getSubprogram(),
+ &MF->front());
+ R << ore::NV("NumInstructions", NumInstsInFunction)
+ << " instructions in function";
+ ORE->emit(R);
+
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
+ // Similarly, don't emit empty functions on Windows either. It can lead to
+ // duplicate entries (two functions with the same RVA) in the Guard CF Table
+ // after linking, causing the kernel not to load the binary:
+ // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html
+ // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer.
+ const Triple &TT = TM.getTargetTriple();
+ if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() ||
+ (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) {
MCInst Noop;
- MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
- OutStreamer->AddComment("avoids zero-length function");
+ MF->getSubtarget().getInstrInfo()->getNoop(Noop);
// Targets can opt-out of emitting the noop here by leaving the opcode
// unspecified.
- if (Noop.getOpcode())
+ if (Noop.getOpcode()) {
+ OutStreamer->AddComment("avoids zero-length function");
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+ }
}
const Function *F = MF->getFunction();
@@ -981,8 +1091,8 @@ void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
- if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+ if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) ||
+ MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->EmitLabel(CurrentFnEnd);
@@ -1015,6 +1125,9 @@ void AsmPrinter::EmitFunctionBody() {
HI.Handler->endFunction(MF);
}
+ if (isVerbose())
+ OutStreamer->GetCommentOS() << "-- End function\n";
+
OutStreamer->AddBlankLine();
}
@@ -1175,11 +1288,7 @@ bool AsmPrinter::doFinalization(Module &M) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- // Emit module flags.
- SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
- M.getModuleFlagsMetadata(ModuleFlags);
- if (!ModuleFlags.empty())
- TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM);
+ TLOF.emitModuleMetadata(*OutStreamer, M, TM);
if (TM.getTargetTriple().isOSBinFormatELF()) {
MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -1238,7 +1347,7 @@ bool AsmPrinter::doFinalization(Module &M) {
break;
AliasStack.push_back(Cur);
}
- for (const GlobalAlias *AncestorAlias : reverse(AliasStack))
+ for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
emitGlobalIndirectSymbol(M, *AncestorAlias);
AliasStack.clear();
}
@@ -1266,7 +1375,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
OutStreamer->EmitLabel(AddrSymbol);
- unsigned PtrSize = M.getDataLayout().getPointerSize(0);
+ unsigned PtrSize = MAI->getCodePointerSize();
OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
PtrSize);
}
@@ -1304,26 +1413,34 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() ||
- MF.hasEHFunclets() || NeedsLocalForSize) {
+ if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
}
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
+ ? PrintSchedule
+ : STI.supportPrintSchedInfo();
}
namespace {
+
// Keep track the alignment, constpool entries per Section.
struct SectionCPs {
MCSection *S;
unsigned Alignment;
SmallVector<unsigned, 4> CPEs;
+
SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
};
-}
+
+} // end anonymous namespace
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
@@ -1547,7 +1664,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
OutStreamer->EmitValue(Value, EntrySize);
}
-
/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
/// special global used by LLVM. If so, emit it and return true, otherwise
/// do nothing and return false.
@@ -1598,13 +1714,16 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
}
namespace {
+
struct Structor {
- Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {}
- int Priority;
- llvm::Constant *Func;
- llvm::GlobalValue *ComdatKey;
+ int Priority = 0;
+ Constant *Func = nullptr;
+ GlobalValue *ComdatKey = nullptr;
+
+ Structor() = default;
};
-} // end namespace
+
+} // end anonymous namespace
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
@@ -1653,8 +1772,11 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
if (GlobalValue *GV = S.ComdatKey) {
- if (GV->hasAvailableExternallyLinkage())
- // If the associated variable is available_externally, some other TU
+ if (GV->isDeclarationForLinker())
+ // If the associated variable is not defined in this module
+ // (it might be available_externally, or have been an
+ // available_externally definition that was dropped by the
+ // EliminateAvailableExternally pass), some other TU
// will provide its dynamic initializer.
continue;
@@ -1931,7 +2053,6 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
}
-
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
@@ -1972,7 +2093,6 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
static void emitGlobalConstantDataSequential(const DataLayout &DL,
const ConstantDataSequential *CDS,
AsmPrinter &AP) {
-
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
@@ -2006,7 +2126,6 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
CDS->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->EmitZeros(Padding);
-
}
static void emitGlobalConstantArray(const DataLayout &DL,
@@ -2145,7 +2264,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
- Realigned = Realigned.lshr(ExtraBitsSize);
+ Realigned.lshrInPlace(ExtraBitsSize);
} else
ExtraBits = Realigned.getRawData()[BitWidth / 64];
}
@@ -2420,8 +2539,6 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
return OutContext.getOrCreateSymbol(NameStr);
}
-
-
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
@@ -2486,7 +2603,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2607,8 +2723,6 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return true;
}
-
-
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
@@ -2639,7 +2753,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
}
/// Pin vtable to this file.
-AsmPrinterHandler::~AsmPrinterHandler() {}
+AsmPrinterHandler::~AsmPrinterHandler() = default;
void AsmPrinterHandler::markFunctionEnd() {}
@@ -2663,37 +2777,61 @@ void AsmPrinter::emitXRayTable() {
auto PrevSection = OutStreamer->getCurrentSectionOnly();
auto Fn = MF->getFunction();
- MCSection *Section = nullptr;
+ MCSection *InstMap = nullptr;
+ MCSection *FnSledIndex = nullptr;
if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
if (Fn->hasComdat()) {
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
Fn->getComdat()->getName());
+ FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+ Fn->getComdat()->getName());
} else {
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC);
+ FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
}
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
- Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+ InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
SectionKind::getReadOnlyWithRel());
+ FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0,
+ SectionKind::getReadOnlyWithRel());
} else {
llvm_unreachable("Unsupported target");
}
// Before we switch over, we force a reference to a label inside the
- // xray_instr_map section. Since this function is always called just
- // before the function's end, we assume that this is happening after
- // the last return instruction.
-
- auto WordSizeBytes = TM.getPointerSize();
- MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+ // xray_fn_idx sections. This makes sure that the xray_fn_idx section is kept
+ // live by the linker if the function is not garbage-collected. Since this
+ // function is always called just before the function's end, we assume that
+ // this is happening after the last return instruction.
+ auto WordSizeBytes = MAI->getCodePointerSize();
+ MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true);
OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
- OutStreamer->SwitchSection(Section);
- OutStreamer->EmitLabel(Tmp);
+ OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false);
+
+ // Now we switch to the instrumentation map section. Because this is done
+ // per-function, we are able to create an index entry that will represent the
+ // range of sleds associated with a function.
+ MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
+ OutStreamer->SwitchSection(InstMap);
+ OutStreamer->EmitLabel(SledsStart);
for (const auto &Sled : Sleds)
Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
-
+ MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
+ OutStreamer->EmitLabel(SledsEnd);
+
+ // We then emit a single entry in the index per function. We use the symbols
+ // that bound the instrumentation map as the range for a specific function.
+ // Each entry here will be 2 * word size aligned, as we're writing down two
+ // pointers. This should work for both 32-bit and 64-bit platforms.
+ OutStreamer->SwitchSection(FnSledIndex);
+ OutStreamer->EmitCodeAlignment(2 * WordSizeBytes);
+ OutStreamer->EmitLabel(IdxRef);
+ OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes);
+ OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes);
OutStreamer->SwitchSection(PrevSection);
Sleds.clear();
}
@@ -2702,8 +2840,11 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
+ bool LogArgs = Fn->hasFnAttribute("xray-log-args");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
+ Kind = SledKind::LOG_ARGS_ENTER;
Sleds.emplace_back(
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 0185c38..0edf905 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -15,6 +15,7 @@
#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,7 +27,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 165b8ee..eae79ad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -48,10 +48,16 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo);
assert(DiagInfo && "Diagnostic context not passed down?");
+ // Look up a LocInfo for the buffer this diagnostic is coming from.
+ unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc());
+ const MDNode *LocInfo = nullptr;
+ if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size())
+ LocInfo = DiagInfo->LocInfos[BufNum-1];
+
// If the inline asm had metadata associated with it, pull out a location
// cookie corresponding to which line the error occurred on.
unsigned LocCookie = 0;
- if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ if (LocInfo) {
unsigned ErrorLine = Diag.getLineNo()-1;
if (ErrorLine >= LocInfo->getNumOperands())
ErrorLine = 0;
@@ -108,7 +114,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
SourceMgr &SrcMgr = DiagInfo->SrcMgr;
SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
- DiagInfo->LocInfo = LocMDNode;
std::unique_ptr<MemoryBuffer> Buffer;
// The inline asm source manager will outlive Str, so make a copy of the
@@ -118,6 +123,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ DiagInfo->LocInfos.resize(BufNum);
+ DiagInfo->LocInfos[BufNum-1] = LocMDNode;
+ }
+
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
@@ -133,6 +144,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
+ if (Dialect == InlineAsm::AD_Intel)
+ // We need this flag to be able to parse numbers like "0bH"
+ Parser->setParsingInlineAsm(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
@@ -144,11 +158,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
/*NoFinalize*/ true);
emitInlineAsmEnd(STI, &TAP->getSTI());
- // LocInfo cannot be used for error generation from the backend.
- // FIXME: associate LocInfo with the SourceBuffer to improve backend
- // messages.
- DiagInfo->LocInfo = nullptr;
-
if (Res && !DiagInfo->DiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8344051..a81d56e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,37 +12,82 @@
//===----------------------------------------------------------------------===//
#include "CodeViewDebug.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/COFF.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
- : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
- TypeTable(Allocator), CurFn(nullptr) {
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
@@ -178,7 +223,8 @@ static const DISubprogram *getQualifiedNameComponents(
static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
StringRef TypeName) {
std::string FullyQualifiedName;
- for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) {
+ for (StringRef QualifiedNameComponent :
+ llvm::reverse(QualifiedNameComponents)) {
FullyQualifiedName.append(QualifiedNameComponent);
FullyQualifiedName.append("::");
}
@@ -238,7 +284,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
// The display name includes function template arguments. Drop them to match
// MSVC.
- StringRef DisplayName = SP->getDisplayName().split('<').first;
+ StringRef DisplayName = SP->getName().split('<').first;
const DIScope *Scope = SP->getScope().resolve();
TypeIndex TI;
@@ -319,7 +365,7 @@ static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
const MachineFunction *MF) {
// Skip this instruction if it has the same location as the previous one.
- if (DL == CurFn->LastLoc)
+ if (!DL || DL == PrevInstLoc)
return;
const DIScope *Scope = DL.get()->getScope();
@@ -339,11 +385,11 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!CurFn->HaveLineInfo)
CurFn->HaveLineInfo = true;
unsigned FileId = 0;
- if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile())
FileId = CurFn->LastFileId;
else
FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
- CurFn->LastLoc = DL;
+ PrevInstLoc = DL;
unsigned FuncId = CurFn->FuncId;
if (const DILocation *SiteLoc = DL->getInlinedAt()) {
@@ -393,7 +439,7 @@ void CodeViewDebug::endModule() {
// subprograms.
switchToDebugSectionForSymbol(nullptr);
- MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols);
emitCompilerInformation();
endCVSubsection(CompilerInfo);
@@ -417,7 +463,7 @@ void CodeViewDebug::endModule() {
// Emit UDT records for any types used by global variables.
if (!GlobalUDTs.empty()) {
- MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
emitDebugInfoForUDTs(GlobalUDTs);
endCVSubsection(SymbolsEnd);
}
@@ -469,17 +515,21 @@ void CodeViewDebug::emitTypeInformation() {
CommentPrefix += ' ';
}
- TypeDatabase TypeDB;
- CVTypeDumper CVTD(TypeDB);
- TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) {
+ TypeTableCollection Table(TypeTable.records());
+ Optional<TypeIndex> B = Table.getFirst();
+ while (B) {
+ // This will fail if the record data is invalid.
+ CVType Record = Table.getType(*B);
+
if (OS.isVerboseAsm()) {
// Emit a block comment describing the type record for readability.
SmallString<512> CommentBlock;
raw_svector_ostream CommentOS(CommentBlock);
ScopedPrinter SP(CommentOS);
SP.setPrefix(CommentPrefix);
- TypeDumpVisitor TDV(TypeDB, &SP, false);
- Error E = CVTD.dump(Record, TDV);
+ TypeDumpVisitor TDV(Table, &SP, false);
+
+ Error E = codeview::visitTypeRecord(Record, *B, TDV);
if (E) {
logAllUnhandledErrors(std::move(E), errs(), "error: ");
llvm_unreachable("produced malformed type record");
@@ -489,29 +539,10 @@ void CodeViewDebug::emitTypeInformation() {
// newline.
OS.emitRawComment(
CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
- } else {
-#ifndef NDEBUG
- // Assert that the type data is valid even if we aren't dumping
- // comments. The MSVC linker doesn't do much type record validation,
- // so the first link of an invalid type record can succeed while
- // subsequent links will fail with LNK1285.
- ByteStream Stream(Record);
- CVTypeArray Types;
- StreamReader Reader(Stream);
- Error E = Reader.readArray(Types, Reader.getLength());
- if (!E) {
- TypeVisitorCallbacks C;
- E = CVTypeVisitor(C).visitTypeStream(Types);
- }
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
-#endif
}
- StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
- OS.EmitBinaryData(S);
- });
+ OS.EmitBinaryData(Record.str_data());
+ B = Table.getNext(*B);
+ }
}
namespace {
@@ -586,7 +617,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
}
}
-} // anonymous namespace
+} // end anonymous namespace
void CodeViewDebug::emitCompilerInformation() {
MCContext &Context = MMI->getContext();
@@ -645,7 +676,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
return;
OS.AddComment("Inlinee lines subsection");
- MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines);
+ MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines);
// We don't provide any extra file info.
// FIXME: Find out if debuggers use this info.
@@ -658,7 +689,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
OS.AddBlankLine();
unsigned FileId = maybeRecordFile(SP->getFile());
- OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " +
+ OS.AddComment("Inlined function " + SP->getName() + " starts at " +
SP->getFilename() + Twine(':') + Twine(SP->getLine()));
OS.AddBlankLine();
// The filechecksum table uses 8 byte entries for now, and file ids start at
@@ -760,17 +791,17 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
- if (!SP->getDisplayName().empty())
+ if (!SP->getName().empty())
FuncName =
- getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
+ getFullyQualifiedName(SP->getScope().resolve(), SP->getName());
// If our DISubprogram name is empty, use the mangled name.
if (FuncName.empty())
- FuncName = GlobalValue::getRealLinkageName(GV->getName());
+ FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
- MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols);
{
MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
*ProcRecordEnd = MMI->getContext().createTempSymbol();
@@ -887,13 +918,21 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
+ // If the variable has an attached offset expression, extract it.
+ // FIXME: Try to handle DW_OP_deref as well.
+ int64_t ExprOffset = 0;
+ if (VI.Expr)
+ if (!VI.Expr->extractIfOffset(ExprOffset))
+ continue;
+
// Get the frame register used and the offset.
unsigned FrameReg = 0;
int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
// Calculate the label ranges.
- LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset);
+ LocalVarDefRange DefRange =
+ createDefRangeMem(CVReg, FrameOffset + ExprOffset);
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -948,10 +987,10 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
// Handle fragments.
auto Fragment = DIExpr->getFragmentInfo();
- if (DIExpr && Fragment) {
+ if (Fragment) {
IsSubfield = true;
StructOffset = Fragment->OffsetInBits / 8;
- } else if (DIExpr && DIExpr->getNumElements() > 0) {
+ } else if (DIExpr->getNumElements() > 0) {
continue; // Ignore unrecognized exprs.
}
@@ -1014,14 +1053,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
}
}
-void CodeViewDebug::beginFunction(const MachineFunction *MF) {
- assert(!CurFn && "Can't process two functions at once!");
-
- if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
- return;
-
- DebugHandlerBase::beginFunction(MF);
-
+void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV) == false);
CurFn = &FnDebugInfo[GV];
@@ -1038,11 +1070,11 @@ void CodeViewDebug::beginFunction(const MachineFunction *MF) {
bool EmptyPrologue = true;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc()) {
PrologEndLoc = MI.getDebugLoc();
break;
- } else if (!MI.isDebugValue()) {
+ } else if (!MI.isMetaInstruction()) {
EmptyPrologue = false;
}
}
@@ -1144,33 +1176,12 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
DITypeRef ElementTypeRef = Ty->getBaseType();
TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
// IndexType is size_t, which depends on the bitness of the target.
- TypeIndex IndexType = Asm->MAI->getPointerSize() == 8
+ TypeIndex IndexType = Asm->TM.getPointerSize() == 8
? TypeIndex(SimpleTypeKind::UInt64Quad)
: TypeIndex(SimpleTypeKind::UInt32Long);
uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
-
- // We want to assert that the element type multiplied by the array lengths
- // match the size of the overall array. However, if we don't have complete
- // type information for the base type, we can't make this assertion. This
- // happens if limited debug info is enabled in this case:
- // struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
- // VTableOptzn array[3];
- // The DICompositeType of VTableOptzn will have size zero, and the array will
- // have size 3 * sizeof(void*), and we should avoid asserting.
- //
- // There is a related bug in the front-end where an array of a structure,
- // which was declared as incomplete structure first, ends up not getting a
- // size assigned to it. (PR28303)
- // Example:
- // struct A(*p)[3];
- // struct A { int f; } a[3];
- bool PartiallyIncomplete = false;
- if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
- PartiallyIncomplete = true;
- }
-
// Add subranges to array type.
DINodeArray Elements = Ty->getElements();
for (int i = Elements.size() - 1; i >= 0; --i) {
@@ -1185,16 +1196,14 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// Variable Length Array (VLA) has Count equal to '-1'.
// Replace with Count '1', assume it is the minimum VLA length.
// FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
- if (Count == -1) {
+ if (Count == -1)
Count = 1;
- PartiallyIncomplete = true;
- }
// Update the element size and element type index for subsequent subranges.
ElementSize *= Count;
// If this is the outermost array, use the size from the array. It will be
- // more accurate if PartiallyIncomplete is true.
+ // more accurate if we had a VLA or an incomplete element type size.
uint64_t ArraySize =
(i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
@@ -1203,9 +1212,6 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
ElementTypeIndex = TypeTable.writeKnownType(AR);
}
- (void)PartiallyIncomplete;
- assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
-
return ElementTypeIndex;
}
@@ -1376,8 +1382,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
- PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64
- : PointerKind::Near32;
+ PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
: PointerMode::PointerToDataMember;
@@ -1492,7 +1498,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
}
TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
- unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+ unsigned VSlotCount =
+ Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize());
SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
VFTableShapeRecord VFTSR(Slots);
@@ -1600,7 +1607,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
EnumeratorCount++;
}
}
- FTI = FLRB.end();
+ FTI = FLRB.end(true);
}
std::string FullName = getFullyQualifiedName(Ty);
@@ -1620,11 +1627,11 @@ struct llvm::ClassInfo {
uint64_t BaseOffset;
};
// [MemberInfo]
- typedef std::vector<MemberInfo> MemberList;
+ using MemberList = std::vector<MemberInfo>;
- typedef TinyPtrVector<const DISubprogram *> MethodsList;
+ using MethodsList = TinyPtrVector<const DISubprogram *>;
// MethodName -> MethodsList
- typedef MapVector<MDString *, MethodsList> MethodsMap;
+ using MethodsMap = MapVector<MDString *, MethodsList>;
/// Base classes.
std::vector<const DIDerivedType *> Inheritance;
@@ -1736,10 +1743,12 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
SizeInBytes, FullName, Ty->getIdentifier());
TypeIndex ClassTI = TypeTable.writeKnownType(CR);
- StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
- TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
- UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
- TypeTable.writeKnownType(USLR);
+ if (const auto *File = Ty->getFile()) {
+ StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File));
+ TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+ UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
+ TypeTable.writeKnownType(USLR);
+ }
addToUDTs(Ty, ClassTI);
@@ -1887,7 +1896,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
translateMethodOptionFlags(SP), VFTableOffset, Name));
MemberCount++;
}
- assert(Methods.size() > 0 && "Empty methods map entry");
+ assert(!Methods.empty() && "Empty methods map entry");
if (Methods.size() == 1)
FLBR.writeMemberType(Methods[0]);
else {
@@ -1905,7 +1914,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
MemberCount++;
}
- TypeIndex FieldTI = FLBR.end();
+ TypeIndex FieldTI = FLBR.end(true);
return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
!Info.NestedClasses.empty());
}
@@ -2115,18 +2124,13 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
}
}
-void CodeViewDebug::endFunction(const MachineFunction *MF) {
- if (!Asm || !CurFn) // We haven't created any debug info for this function.
- return;
-
+void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV));
assert(CurFn == &FnDebugInfo[GV]);
collectVariableInfo(GV->getSubprogram());
- DebugHandlerBase::endFunction(MF);
-
// Don't emit anything if we don't have any line tables.
if (!CurFn->HaveLineInfo) {
FnDebugInfo.erase(GV);
@@ -2146,13 +2150,27 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
+
+ // If the first instruction of a new MBB has no location, find the first
+ // instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (DL == PrevInstLoc || !DL)
+ if (!DL && MI->getParent() != PrevInstBB) {
+ for (const auto &NextMI : *MI->getParent()) {
+ DL = NextMI.getDebugLoc();
+ if (DL)
+ break;
+ }
+ }
+ PrevInstBB = MI->getParent();
+
+ // If we still don't have a debug location, don't record a location.
+ if (!DL)
return;
+
maybeRecordLocation(DL, Asm->MF);
}
-MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) {
+MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) {
MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
*EndLabel = MMI->getContext().createTempSymbol();
OS.EmitIntValue(unsigned(Kind), 4);
@@ -2212,7 +2230,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
if (!EndLabel) {
OS.AddComment("Symbol subsection for globals");
- EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
}
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
@@ -2228,9 +2246,9 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
if (GV->hasComdat()) {
MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::getRealLinkageName(GV->getName())));
+ Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
- EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
endCVSubsection(EndLabel);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 3dd4315..fd8f604 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===//
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,29 +14,44 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#include "DbgValueHistoryCalculator.h"
#include "DebugHandlerBase.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
+#include <map>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
namespace llvm {
-class StringRef;
-class LexicalScope;
struct ClassInfo;
+class StringRef;
+class AsmPrinter;
+class Function;
+class GlobalVariable;
+class MCSectionCOFF;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
/// \brief Collects and handles line tables information in a CodeView format.
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
MCStreamer &OS;
- llvm::BumpPtrAllocator Allocator;
+ BumpPtrAllocator Allocator;
codeview::TypeTableBuilder TypeTable;
/// Represents the most general definition range.
@@ -103,14 +118,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LocalVariable, 1> Locals;
- DebugLoc LastLoc;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
unsigned FuncId = 0;
unsigned LastFileId = 0;
bool HaveLineInfo = false;
};
- FunctionInfo *CurFn;
+ FunctionInfo *CurFn = nullptr;
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
@@ -176,8 +190,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
GlobalUDTs;
- typedef std::map<const DIFile *, std::string> FileToFilepathMapTy;
+ using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
FileToFilepathMapTy FileToFilepathMap;
+
StringRef getFullFilepath(const DIFile *S);
unsigned maybeRecordFile(const DIFile *F);
@@ -216,14 +231,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
/// finished.
- MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind);
+ MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind);
void endCVSubsection(MCSymbol *EndLabel);
void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
const InlineSite &Site);
- typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+ using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
void collectVariableInfo(const DISubprogram *SP);
@@ -299,23 +314,25 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
unsigned getPointerSizeInBytes();
+protected:
+ /// \brief Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// \brief Gather post-function debug information.
+ void endFunctionImpl(const MachineFunction *) override;
+
public:
CodeViewDebug(AsmPrinter *Asm);
- void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
+ void setSymbolSize(const MCSymbol *, uint64_t) override {}
/// \brief Emit the COFF section that holds the line table information.
void endModule() override;
- /// \brief Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// \brief Gather post-function debug information.
- void endFunction(const MachineFunction *) override;
-
/// \brief Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
};
-} // End of namespace llvm
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 8799189..886e6e2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -31,6 +31,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
//===----------------------------------------------------------------------===//
// DIEAbbrevData Implementation
//===----------------------------------------------------------------------===//
@@ -42,6 +44,8 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
// overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
ID.AddInteger(unsigned(Attribute));
ID.AddInteger(unsigned(Form));
+ if (Form == dwarf::DW_FORM_implicit_const)
+ ID.AddInteger(Value);
}
//===----------------------------------------------------------------------===//
@@ -77,15 +81,22 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
dwarf::AttributeString(AttrData.getAttribute()).data());
// Emit form type.
+#ifndef NDEBUG
+ // Could be an assertion, but this way we can see the failing form code
+ // easily, which helps track down where it came from.
+ if (!dwarf::isValidFormForVersion(AttrData.getForm(),
+ AP->getDwarfVersion())) {
+ DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm())
+ << " for DWARF version " << AP->getDwarfVersion() << "\n");
+ llvm_unreachable("Invalid form for specified DWARF version");
+ }
+#endif
AP->EmitULEB128(AttrData.getForm(),
dwarf::FormEncodingString(AttrData.getForm()).data());
// Emit value for DW_FORM_implicit_const.
- if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
- assert(AP->getDwarfVersion() >= 5 &&
- "DW_FORM_implicit_const is supported starting from DWARFv5");
+ if (AttrData.getForm() == dwarf::DW_FORM_implicit_const)
AP->EmitSLEB128(AttrData.getValue());
- }
}
// Mark end of abbreviation.
@@ -94,7 +105,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
}
LLVM_DUMP_METHOD
-void DIEAbbrev::print(raw_ostream &O) {
+void DIEAbbrev::print(raw_ostream &O) const {
O << "Abbreviation @"
<< format("0x%lx", (long)(intptr_t)this)
<< " "
@@ -107,13 +118,20 @@ void DIEAbbrev::print(raw_ostream &O) {
O << " "
<< dwarf::AttributeString(Data[i].getAttribute())
<< " "
- << dwarf::FormEncodingString(Data[i].getForm())
- << '\n';
+ << dwarf::FormEncodingString(Data[i].getForm());
+
+ if (Data[i].getForm() == dwarf::DW_FORM_implicit_const)
+ O << " " << Data[i].getValue();
+
+ O << '\n';
}
}
-LLVM_DUMP_METHOD
-void DIEAbbrev::dump() { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEAbbrev::dump() const {
+ print(dbgs());
+}
+#endif
//===----------------------------------------------------------------------===//
// DIEAbbrevSet Implementation
@@ -249,10 +267,11 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
O << "\n";
}
-LLVM_DUMP_METHOD
-void DIE::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIE::dump() const {
print(dbgs());
}
+#endif
unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
DIEAbbrevSet &AbbrevSet,
@@ -340,10 +359,11 @@ void DIEValue::print(raw_ostream &O) const {
}
}
-LLVM_DUMP_METHOD
-void DIEValue::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEValue::dump() const {
print(dbgs());
}
+#endif
//===----------------------------------------------------------------------===//
// DIEInteger Implementation
@@ -354,57 +374,42 @@ void DIEValue::dump() const {
void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_implicit_const:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
Asm->OutStreamer->AddBlankLine();
return;
case dwarf::DW_FORM_flag:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref1:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data1:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
case dwarf::DW_FORM_ref2:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data2:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref4:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data4:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
case dwarf::DW_FORM_ref8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_sig8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data8:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup8:
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_addr:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_addr:
Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
return;
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -419,35 +424,41 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
- case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_flag_present: return 0;
- case dwarf::DW_FORM_flag: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref1: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data1: return sizeof(int8_t);
- case dwarf::DW_FORM_ref2: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data2: return sizeof(int16_t);
- case dwarf::DW_FORM_ref4: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data4: return sizeof(int32_t);
- case dwarf::DW_FORM_ref8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sig8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_implicit_const:
+ case dwarf::DW_FORM_flag_present:
+ return 0;
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sig8:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref_sup8:
+ return sizeof(int64_t);
case dwarf::DW_FORM_ref_addr:
if (AP->getDwarfVersion() == 2)
return AP->getPointerSize();
LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
switch (AP->OutStreamer->getContext().getDwarfFormat()) {
case dwarf::DWARF32:
return 4;
@@ -456,11 +467,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
llvm_unreachable("Invalid DWARF format");
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
@@ -484,7 +492,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugValue(Expr, SizeOf(AP, Form));
+ AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
@@ -519,7 +527,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
LLVM_DUMP_METHOD
@@ -541,7 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
LLVM_DUMP_METHOD
@@ -647,20 +655,12 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref_addr: {
// Get the absolute offset for this DIE within the debug info/types section.
unsigned Addr = Entry->getDebugSectionOffset();
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
- const DwarfDebug *DD = AP->getDwarfDebug();
- if (DD)
- assert(!DD->useSplitDwarf() &&
- "TODO: dwo files can't have relocations.");
- const DIEUnit *Unit = Entry->getUnit();
- assert(Unit && "CUDie should belong to a CU.");
- MCSection *Section = Unit->getSection();
- if (Section) {
- const MCSymbol *SectionSym = Section->getBeginSymbol();
- AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
- return;
- }
+ if (const MCSymbol *SectionSym =
+ Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
+ AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+ return;
}
+
AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
return;
}
@@ -683,7 +683,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return getULEB128Size(Entry->getOffset());
case dwarf::DW_FORM_ref_addr:
if (AP->getDwarfVersion() == 2)
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
switch (AP->OutStreamer->getContext().getDwarfFormat()) {
case dwarf::DWARF32:
return 4;
@@ -809,7 +809,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
return 4;
- return AP->getPointerSize();
+ return AP->MAI->getCodePointerSize();
}
/// EmitValue - Emit label value.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index d8ecc7c..15ade3c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ByteStreamer.h"
#include "DIEHash.h"
+#include "ByteStreamer.h"
#include "DwarfDebug.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
@@ -116,65 +116,17 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Collect all of the attributes for a particular DIE in single structure.
void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
-#define COLLECT_ATTR(NAME) \
- case dwarf::NAME: \
- Attrs.NAME = V; \
- break
for (const auto &V : Die.values()) {
DEBUG(dbgs() << "Attribute: "
<< dwarf::AttributeString(V.getAttribute())
<< " added.\n");
switch (V.getAttribute()) {
- COLLECT_ATTR(DW_AT_name);
- COLLECT_ATTR(DW_AT_accessibility);
- COLLECT_ATTR(DW_AT_address_class);
- COLLECT_ATTR(DW_AT_allocated);
- COLLECT_ATTR(DW_AT_artificial);
- COLLECT_ATTR(DW_AT_associated);
- COLLECT_ATTR(DW_AT_binary_scale);
- COLLECT_ATTR(DW_AT_bit_offset);
- COLLECT_ATTR(DW_AT_bit_size);
- COLLECT_ATTR(DW_AT_bit_stride);
- COLLECT_ATTR(DW_AT_byte_size);
- COLLECT_ATTR(DW_AT_byte_stride);
- COLLECT_ATTR(DW_AT_const_expr);
- COLLECT_ATTR(DW_AT_const_value);
- COLLECT_ATTR(DW_AT_containing_type);
- COLLECT_ATTR(DW_AT_count);
- COLLECT_ATTR(DW_AT_data_bit_offset);
- COLLECT_ATTR(DW_AT_data_location);
- COLLECT_ATTR(DW_AT_data_member_location);
- COLLECT_ATTR(DW_AT_decimal_scale);
- COLLECT_ATTR(DW_AT_decimal_sign);
- COLLECT_ATTR(DW_AT_default_value);
- COLLECT_ATTR(DW_AT_digit_count);
- COLLECT_ATTR(DW_AT_discr);
- COLLECT_ATTR(DW_AT_discr_list);
- COLLECT_ATTR(DW_AT_discr_value);
- COLLECT_ATTR(DW_AT_encoding);
- COLLECT_ATTR(DW_AT_enum_class);
- COLLECT_ATTR(DW_AT_endianity);
- COLLECT_ATTR(DW_AT_explicit);
- COLLECT_ATTR(DW_AT_is_optional);
- COLLECT_ATTR(DW_AT_location);
- COLLECT_ATTR(DW_AT_lower_bound);
- COLLECT_ATTR(DW_AT_mutable);
- COLLECT_ATTR(DW_AT_ordering);
- COLLECT_ATTR(DW_AT_picture_string);
- COLLECT_ATTR(DW_AT_prototyped);
- COLLECT_ATTR(DW_AT_small);
- COLLECT_ATTR(DW_AT_segment);
- COLLECT_ATTR(DW_AT_string_length);
- COLLECT_ATTR(DW_AT_threads_scaled);
- COLLECT_ATTR(DW_AT_upper_bound);
- COLLECT_ATTR(DW_AT_use_location);
- COLLECT_ATTR(DW_AT_use_UTF8);
- COLLECT_ATTR(DW_AT_variable_parameter);
- COLLECT_ATTR(DW_AT_virtuality);
- COLLECT_ATTR(DW_AT_visibility);
- COLLECT_ATTR(DW_AT_vtable_elem_location);
- COLLECT_ATTR(DW_AT_type);
+#define HANDLE_DIE_HASH_ATTR(NAME) \
+ case dwarf::NAME: \
+ Attrs.NAME = V; \
+ break;
+#include "DIEHashAttributes.def"
default:
break;
}
@@ -366,62 +318,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
// Go through the attributes from \param Attrs in the order specified in 7.27.4
// and hash them.
void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
-#define ADD_ATTR(ATTR) \
+#define HANDLE_DIE_HASH_ATTR(NAME) \
{ \
- if (ATTR) \
- hashAttribute(ATTR, Tag); \
+ if (Attrs.NAME) \
+ hashAttribute(Attrs.NAME, Tag); \
}
-
- ADD_ATTR(Attrs.DW_AT_name);
- ADD_ATTR(Attrs.DW_AT_accessibility);
- ADD_ATTR(Attrs.DW_AT_address_class);
- ADD_ATTR(Attrs.DW_AT_allocated);
- ADD_ATTR(Attrs.DW_AT_artificial);
- ADD_ATTR(Attrs.DW_AT_associated);
- ADD_ATTR(Attrs.DW_AT_binary_scale);
- ADD_ATTR(Attrs.DW_AT_bit_offset);
- ADD_ATTR(Attrs.DW_AT_bit_size);
- ADD_ATTR(Attrs.DW_AT_bit_stride);
- ADD_ATTR(Attrs.DW_AT_byte_size);
- ADD_ATTR(Attrs.DW_AT_byte_stride);
- ADD_ATTR(Attrs.DW_AT_const_expr);
- ADD_ATTR(Attrs.DW_AT_const_value);
- ADD_ATTR(Attrs.DW_AT_containing_type);
- ADD_ATTR(Attrs.DW_AT_count);
- ADD_ATTR(Attrs.DW_AT_data_bit_offset);
- ADD_ATTR(Attrs.DW_AT_data_location);
- ADD_ATTR(Attrs.DW_AT_data_member_location);
- ADD_ATTR(Attrs.DW_AT_decimal_scale);
- ADD_ATTR(Attrs.DW_AT_decimal_sign);
- ADD_ATTR(Attrs.DW_AT_default_value);
- ADD_ATTR(Attrs.DW_AT_digit_count);
- ADD_ATTR(Attrs.DW_AT_discr);
- ADD_ATTR(Attrs.DW_AT_discr_list);
- ADD_ATTR(Attrs.DW_AT_discr_value);
- ADD_ATTR(Attrs.DW_AT_encoding);
- ADD_ATTR(Attrs.DW_AT_enum_class);
- ADD_ATTR(Attrs.DW_AT_endianity);
- ADD_ATTR(Attrs.DW_AT_explicit);
- ADD_ATTR(Attrs.DW_AT_is_optional);
- ADD_ATTR(Attrs.DW_AT_location);
- ADD_ATTR(Attrs.DW_AT_lower_bound);
- ADD_ATTR(Attrs.DW_AT_mutable);
- ADD_ATTR(Attrs.DW_AT_ordering);
- ADD_ATTR(Attrs.DW_AT_picture_string);
- ADD_ATTR(Attrs.DW_AT_prototyped);
- ADD_ATTR(Attrs.DW_AT_small);
- ADD_ATTR(Attrs.DW_AT_segment);
- ADD_ATTR(Attrs.DW_AT_string_length);
- ADD_ATTR(Attrs.DW_AT_threads_scaled);
- ADD_ATTR(Attrs.DW_AT_upper_bound);
- ADD_ATTR(Attrs.DW_AT_use_location);
- ADD_ATTR(Attrs.DW_AT_use_UTF8);
- ADD_ATTR(Attrs.DW_AT_variable_parameter);
- ADD_ATTR(Attrs.DW_AT_virtuality);
- ADD_ATTR(Attrs.DW_AT_visibility);
- ADD_ATTR(Attrs.DW_AT_vtable_elem_location);
- ADD_ATTR(Attrs.DW_AT_type);
-
+#include "DIEHashAttributes.def"
// FIXME: Add the extended attributes.
}
@@ -478,10 +380,12 @@ void DIEHash::computeHash(const DIE &Die) {
/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
/// with the inclusion of the full CU and all top level CU entities.
// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
-uint64_t DIEHash::computeCUSignature(const DIE &Die) {
+uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) {
Numbering.clear();
Numbering[&Die] = 1;
+ if (!DWOName.empty())
+ Hash.update(DWOName);
// Hash the DIE.
computeHash(Die);
@@ -490,9 +394,9 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -514,7 +418,7 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 996cd7e..29337ae 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -28,64 +28,15 @@ class CompileUnit;
class DIEHash {
// Collection of all attributes used in hashing a particular DIE.
struct DIEAttrs {
- DIEValue DW_AT_name;
- DIEValue DW_AT_accessibility;
- DIEValue DW_AT_address_class;
- DIEValue DW_AT_allocated;
- DIEValue DW_AT_artificial;
- DIEValue DW_AT_associated;
- DIEValue DW_AT_binary_scale;
- DIEValue DW_AT_bit_offset;
- DIEValue DW_AT_bit_size;
- DIEValue DW_AT_bit_stride;
- DIEValue DW_AT_byte_size;
- DIEValue DW_AT_byte_stride;
- DIEValue DW_AT_const_expr;
- DIEValue DW_AT_const_value;
- DIEValue DW_AT_containing_type;
- DIEValue DW_AT_count;
- DIEValue DW_AT_data_bit_offset;
- DIEValue DW_AT_data_location;
- DIEValue DW_AT_data_member_location;
- DIEValue DW_AT_decimal_scale;
- DIEValue DW_AT_decimal_sign;
- DIEValue DW_AT_default_value;
- DIEValue DW_AT_digit_count;
- DIEValue DW_AT_discr;
- DIEValue DW_AT_discr_list;
- DIEValue DW_AT_discr_value;
- DIEValue DW_AT_encoding;
- DIEValue DW_AT_enum_class;
- DIEValue DW_AT_endianity;
- DIEValue DW_AT_explicit;
- DIEValue DW_AT_is_optional;
- DIEValue DW_AT_location;
- DIEValue DW_AT_lower_bound;
- DIEValue DW_AT_mutable;
- DIEValue DW_AT_ordering;
- DIEValue DW_AT_picture_string;
- DIEValue DW_AT_prototyped;
- DIEValue DW_AT_small;
- DIEValue DW_AT_segment;
- DIEValue DW_AT_string_length;
- DIEValue DW_AT_threads_scaled;
- DIEValue DW_AT_upper_bound;
- DIEValue DW_AT_use_location;
- DIEValue DW_AT_use_UTF8;
- DIEValue DW_AT_variable_parameter;
- DIEValue DW_AT_virtuality;
- DIEValue DW_AT_visibility;
- DIEValue DW_AT_vtable_elem_location;
- DIEValue DW_AT_type;
-
- // Insert any additional ones here...
+#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME;
+#include "DIEHashAttributes.def"
};
public:
DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
/// \brief Computes the CU signature.
- uint64_t computeCUSignature(const DIE &Die);
+ uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
/// \brief Computes the type signature.
uint64_t computeTypeSignature(const DIE &Die);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
new file mode 100644
index 0000000..28a0239
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def
@@ -0,0 +1,55 @@
+#ifndef HANDLE_DIE_HASH_ATTR
+#error "Missing macro definition of HANDLE_DIE_HASH_ATTR"
+#endif
+
+HANDLE_DIE_HASH_ATTR(DW_AT_name)
+HANDLE_DIE_HASH_ATTR(DW_AT_accessibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_address_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_allocated)
+HANDLE_DIE_HASH_ATTR(DW_AT_artificial)
+HANDLE_DIE_HASH_ATTR(DW_AT_associated)
+HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_size)
+HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_expr)
+HANDLE_DIE_HASH_ATTR(DW_AT_const_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_containing_type)
+HANDLE_DIE_HASH_ATTR(DW_AT_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale)
+HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign)
+HANDLE_DIE_HASH_ATTR(DW_AT_default_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_digit_count)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_list)
+HANDLE_DIE_HASH_ATTR(DW_AT_discr_value)
+HANDLE_DIE_HASH_ATTR(DW_AT_encoding)
+HANDLE_DIE_HASH_ATTR(DW_AT_enum_class)
+HANDLE_DIE_HASH_ATTR(DW_AT_endianity)
+HANDLE_DIE_HASH_ATTR(DW_AT_explicit)
+HANDLE_DIE_HASH_ATTR(DW_AT_is_optional)
+HANDLE_DIE_HASH_ATTR(DW_AT_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_mutable)
+HANDLE_DIE_HASH_ATTR(DW_AT_ordering)
+HANDLE_DIE_HASH_ATTR(DW_AT_picture_string)
+HANDLE_DIE_HASH_ATTR(DW_AT_prototyped)
+HANDLE_DIE_HASH_ATTR(DW_AT_small)
+HANDLE_DIE_HASH_ATTR(DW_AT_segment)
+HANDLE_DIE_HASH_ATTR(DW_AT_string_length)
+HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled)
+HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8)
+HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter)
+HANDLE_DIE_HASH_ATTR(DW_AT_virtuality)
+HANDLE_DIE_HASH_ATTR(DW_AT_visibility)
+HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location)
+HANDLE_DIE_HASH_ATTR(DW_AT_type)
+
+#undef HANDLE_DIE_HASH_ATTR
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 22fd7bb..c2ad9db 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// some variables.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
if (TRI->isVirtualRegister(MO.getReg()))
@@ -209,8 +213,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
} else if (MO.isRegMask()) {
// If this is a register mask operand, clobber all debug values in
// non-CSRs.
- for (int I = ChangingRegs.find_first(); I != -1;
- I = ChangingRegs.find_next(I)) {
+ for (unsigned I : ChangingRegs.set_bits()) {
// Don't consider SP to be clobbered by register masks.
if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
MO.clobbersPhysReg(I)) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 9419098..0971c59 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -115,12 +115,35 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
return getBaseTypeSize(BaseType);
}
+static bool hasDebugInfo(const MachineModuleInfo *MMI,
+ const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo())
+ return false;
+ auto *SP = MF->getFunction()->getSubprogram();
+ if (!SP)
+ return false;
+ assert(SP->getUnit());
+ auto EK = SP->getUnit()->getEmissionKind();
+ if (EK == DICompileUnit::NoDebug)
+ return false;
+ return true;
+}
+
void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ PrevInstBB = nullptr;
+
+ if (!Asm || !hasDebugInfo(MMI, MF)) {
+ skippedNonDebugFunction();
+ return;
+ }
+
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
LScopes.initialize(*MF);
- if (LScopes.empty())
+ if (LScopes.empty()) {
+ beginFunctionImpl(MF);
return;
+ }
// Make sure that each lexical scope will have a begin/end label.
identifyScopeMarkers();
@@ -167,6 +190,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
PrevInstLoc = DebugLoc();
PrevLabel = Asm->getFunctionBegin();
+ beginFunctionImpl(MF);
}
void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
@@ -200,9 +224,9 @@ void DebugHandlerBase::endInstruction() {
return;
assert(CurMI != nullptr);
- // Don't create a new label after DBG_VALUE instructions.
- // They don't generate code.
- if (!CurMI->isDebugValue()) {
+ // Don't create a new label after DBG_VALUE and other instructions that don't
+ // generate code.
+ if (!CurMI->isMetaInstruction()) {
PrevLabel = nullptr;
PrevInstBB = CurMI->getParent();
}
@@ -228,6 +252,8 @@ void DebugHandlerBase::endInstruction() {
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ if (hasDebugInfo(MMI, MF))
+ endFunctionImpl(MF);
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index c00fa18..659a921 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -80,6 +80,10 @@ protected:
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
+ virtual void beginFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void endFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void skippedNonDebugFunction() {}
+
// AsmPrinterHandler overrides.
public:
void beginInstruction(const MachineInstr *MI) override;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 36fb150..a68e8cc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -76,7 +76,8 @@ public:
const DIExpression *getExpression() const { return Expression; }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
- void dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
if (isLocation()) {
llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
if (Loc.isIndirect())
@@ -90,6 +91,7 @@ public:
if (Expression)
Expression->dump();
}
+#endif
};
private:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 3656e9d..0c551df 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -10,9 +10,9 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+#include "ByteStreamer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "ByteStreamer.h"
namespace llvm {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 05ac1cb..b1ef8cf 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -16,12 +16,12 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index e08306b..dd7f793 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -14,6 +14,7 @@
#include "DwarfException.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,7 +29,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d904372..676c48f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,3 +1,16 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -129,67 +142,72 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
- bool AllConstant = std::all_of(
- GlobalExprs.begin(), GlobalExprs.end(),
- [&](const GlobalExpr GE) {
- return GE.Expr && GE.Expr->isConstant();
- });
-
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
const DIExpression *Expr = GE.Expr;
+
// For compatibility with DWARF 3 and earlier,
// DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
// DW_AT_const_value(X).
if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+ addToAccelTable = true;
addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
- // We cannot describe the location of dllimport'd variables: the
- // computation of their address requires loads from the IAT.
- } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
- if (!Loc) {
- Loc = new (DIEValueAllocator) DIELoc;
- DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
- }
+ break;
+ }
+
+ // We cannot describe the location of dllimport'd variables: the
+ // computation of their address requires loads from the IAT.
+ if (Global && Global->hasDLLImportStorageClass())
+ continue;
+
+ // Nothing to describe without address or constant.
+ if (!Global && (!Expr || !Expr->isConstant()))
+ continue;
+
+ if (!Loc) {
addToAccelTable = true;
- if (Global) {
- const MCSymbol *Sym = Asm->getSymbol(Global);
- if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
- // TODO: add debug info for emulated thread local mode.
- } else {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
- } else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
- }
- // 3) followed by an OP to make the debugger do a TLS lookup.
+ Loc = new (DIEValueAllocator) DIELoc;
+ DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ }
+
+ if (Global) {
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
+ } else {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
}
- if (Expr) {
- DwarfExpr->addFragmentOffset(Expr);
- DwarfExpr->AddExpression(Expr);
- }
+ }
+ if (Expr) {
+ DwarfExpr->addFragmentOffset(Expr);
+ DwarfExpr->addExpression(Expr);
}
}
if (Loc)
@@ -227,17 +245,6 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
CURanges.back().setEnd(Range.getEnd());
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label, const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
- return addSectionDelta(Die, Attribute, Label, Sec);
-}
-
void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
@@ -362,15 +369,6 @@ void DwarfCompileUnit::constructScopeDIE(
FinalChildren.push_back(std::move(ScopeDIE));
}
-DIE::value_iterator
-DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo) {
- return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- new (DIEValueAllocator) DIEDelta(Hi, Lo));
-}
-
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -422,7 +420,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
auto *InlinedSP = getDISubprogram(DS);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
+ DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
@@ -507,8 +505,8 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
- DwarfExpr.AddExpression(Expr);
+ DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
@@ -529,12 +527,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
unsigned FrameReg = 0;
+ const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
- DwarfExpr.addFragmentOffset(Fragment.Expr);
- DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- FrameReg, Offset);
- DwarfExpr.AddExpression(Fragment.Expr);
+ DwarfExpr.addFragmentOffset(Expr);
+ SmallVector<uint64_t, 8> Ops;
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(Offset);
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
+ DIExpressionCursor Cursor(Ops);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+ DwarfExpr.addExpression(std::move(Cursor));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
@@ -609,7 +614,7 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
LexicalScope *Scope) {
- DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
+ DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
if (AbsDef)
return;
@@ -659,8 +664,9 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
else
EntityDie = getDIE(Entity);
assert(EntityDie);
- addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
- Module->getScope()->getDirectory());
+ auto *File = Module->getFile();
+ addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "",
+ File ? File->getDirectory() : "");
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
if (!Name.empty())
@@ -671,7 +677,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
DIE *D = getDIE(SP);
- if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
+ if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
if (D)
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
@@ -683,6 +689,42 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
}
}
+void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) {
+ DbgVariable *AbsVar = getExistingAbstractVariable(
+ InlinedVariable(Var.getVariable(), Var.getInlinedAt()));
+ auto *VariableDie = Var.getDIE();
+ if (AbsVar && AbsVar->getDIE()) {
+ addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+ *AbsVar->getDIE());
+ } else
+ applyVariableAttributes(Var, *VariableDie);
+}
+
+DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) {
+ const DILocalVariable *Cleansed;
+ return getExistingAbstractVariable(IV, Cleansed);
+}
+
+// Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
+ InlinedVariable IV, const DILocalVariable *&Cleansed) {
+ // More then one inlined variable corresponds to one abstract variable.
+ Cleansed = IV.first;
+ auto &AbstractVariables = getAbstractVariables();
+ auto I = AbstractVariables.find(Cleansed);
+ if (I != AbstractVariables.end())
+ return I->second.get();
+ return nullptr;
+}
+
+void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->isAbstractScope());
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
+ DU->addScopeVariable(Scope, AbsDbgVariable.get());
+ getAbstractVariables()[Var] = std::move(AbsDbgVariable);
+}
+
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
if (!Skeleton) {
@@ -690,27 +732,54 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
Asm->OutStreamer->EmitLabel(LabelBegin);
}
- DwarfUnit::emitHeader(UseOffsets);
+ dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
+ : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
+ : dwarf::DW_UT_compile;
+ DwarfUnit::emitCommonHeader(UseOffsets, UT);
}
/// addGlobalName - Add a new global name to the compile unit.
-void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Name.str();
GlobalNames[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
+ const DIScope *Context) {
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
- if (includeMinimalInlineScopes())
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
return;
std::string FullName = getParentContextString(Context) + Ty->getName().str();
GlobalTypes[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
+ const DIScope *Context) {
+ if (!DD->hasDwarfPubSections(includeMinimalInlineScopes()))
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
@@ -727,22 +796,23 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
- if (!validReg)
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect() && Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(Location.getOffset());
+ }
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Expr.finalize());
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Start with the address based on the location provided, and generate the
@@ -754,23 +824,25 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIExpression *Expr = DV.getSingleExpression();
- DIExpressionCursor ExprCursor(Expr);
+ const DIExpression *DIExpr = DV.getSingleExpression();
+ DwarfExpr.addFragmentOffset(DIExpr);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect() && Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(Location.getOffset());
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- auto Reg = Location.getReg();
- DwarfExpr.addFragmentOffset(Expr);
- bool ValidReg =
- Location.getOffset()
- ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
- : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
-
- if (!ValidReg)
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
-
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Add a Dwarf loclistptr attribute data and value.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index a8025f1..e386727 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,8 +15,8 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
#include "DwarfUnit.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/Support/Dwarf.h"
namespace llvm {
@@ -28,7 +28,7 @@ class DwarfFile;
class MCSymbol;
class LexicalScope;
-class DwarfCompileUnit : public DwarfUnit {
+class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
@@ -68,13 +68,26 @@ class DwarfCompileUnit : public DwarfUnit {
// ranges/locs.
const MCSymbol *BaseAddress;
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+
/// \brief Construct a DIE for the given DbgVariable without initializing the
/// DbgVariable's DIE reference.
DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
bool isDwoUnit() const override;
- bool includeMinimalInlineScopes() const;
+ DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractSPDies;
+ return DU->getAbstractSPDies();
+ }
+
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractVariables;
+ return DU->getAbstractVariables();
+ }
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
@@ -86,6 +99,8 @@ public:
return Skeleton;
}
+ bool includeMinimalInlineScopes() const;
+
void initStmtList();
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
@@ -112,10 +127,6 @@ public:
void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
- /// addSectionDelta - Add a label delta attribute data and value.
- DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Hi, const MCSymbol *Lo);
-
DwarfCompileUnit &getCU() override { return *this; }
unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
@@ -136,12 +147,6 @@ public:
void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
- /// addSectionLabel - Add a Dwarf section label attribute data and value.
- ///
- DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
- const MCSymbol *Label,
- const MCSymbol *Sec);
-
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
@@ -189,6 +194,13 @@ public:
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
+ void finishVariableDefinition(const DbgVariable &Var);
+ /// Find abstract variable associated with Var.
+ typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
+ const DILocalVariable *&Cleansed);
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
+ void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
@@ -210,12 +222,19 @@ public:
}
/// Add a new global name to the compile unit.
- void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+
+ /// Add a new global name present in a type unit to this compile unit.
+ void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
/// Add a new global type to the compile unit.
void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) override;
+ /// Add a new global type present in a type unit to this compile unit.
+ void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
+
const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 91a3d09..f1b4d9f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -38,8 +39,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
@@ -72,6 +71,10 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::desc("Generate dwarf aranges"),
cl::init(false));
+static cl::opt<bool> SplitDwarfCrossCuReferences(
+ "split-dwarf-cross-cu-references", cl::Hidden,
+ cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
+
namespace {
enum DefaultOnOff { Default, Enable, Disable };
}
@@ -92,14 +95,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::init(Default));
static cl::opt<DefaultOnOff>
-SplitDwarf("split-dwarf", cl::Hidden,
- cl::desc("Output DWARF5 split debug info."),
- cl::values(clEnumVal(Default, "Default for platform"),
- clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled")),
- cl::init(Default));
-
-static cl::opt<DefaultOnOff>
DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate DWARF pubnames and pubtypes sections"),
cl::values(clEnumVal(Default, "Default for platform"),
@@ -127,17 +122,17 @@ static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
-void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+void DebugLocDwarfExpression::emitSigned(int64_t Value) {
BS.EmitSLEB128(Value, Twine(Value));
}
-void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
@@ -200,6 +195,12 @@ const DIType *DbgVariable::getType() const {
}
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ if (FrameIndexExprs.size() == 1)
+ return FrameIndexExprs;
+
+ assert(all_of(FrameIndexExprs,
+ [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
+ "multiple FI expressions without DW_OP_LLVM_fragment");
std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
@@ -248,17 +249,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
HasAppleExtensionAttributes = tuneForLLDB();
- // Handle split DWARF. Off by default for now.
- if (SplitDwarf == Default)
- HasSplitDwarf = false;
- else
- HasSplitDwarf = SplitDwarf == Enable;
-
- // Pubnames/pubtypes on by default for GDB.
- if (DwarfPubSections == Default)
- HasDwarfPubSections = tuneForGDB();
- else
- HasDwarfPubSections = DwarfPubSections == Enable;
+ // Handle split DWARF.
+ HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();
// SCE defaults to linkage names only for abstract subprograms.
if (DwarfLinkageNames == DefaultLinkageNames)
@@ -368,25 +360,49 @@ template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
F(*SkelCU);
}
-void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+bool DwarfDebug::shareAcrossDWOCUs() const {
+ return SplitDwarfCrossCuReferences;
+}
+
+void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
+ LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
- ProcessedSPNodes.insert(SP);
-
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- auto &CU = *CUMap.lookup(SP->getUnit());
- forBothCUs(CU, [&](DwarfCompileUnit &CU) {
- CU.constructAbstractSubprogramScopeDIE(Scope);
- });
+ if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining())
+ // Avoid building the original CU if it won't be used
+ SrcCU.constructAbstractSubprogramScopeDIE(Scope);
+ else {
+ auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ if (auto *SkelCU = CU.getSkeleton()) {
+ (shareAcrossDWOCUs() ? CU : SrcCU)
+ .constructAbstractSubprogramScopeDIE(Scope);
+ if (CU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructAbstractSubprogramScopeDIE(Scope);
+ } else
+ CU.constructAbstractSubprogramScopeDIE(Scope);
+ }
+}
+
+bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const {
+ // Opting in to GNU Pubnames/types overrides the default to ensure these are
+ // generated for things like Gold's gdb_index generation.
+ if (GenerateGnuPubSections)
+ return true;
+
+ if (DwarfPubSections == Default)
+ return tuneForGDB() && !includeMinimalInlineScopes;
+
+ return DwarfPubSections == Enable;
}
-void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
- if (!GenerateGnuPubSections)
+void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const {
+ if (!hasDwarfPubSections(U.includeMinimalInlineScopes()))
return;
U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
@@ -395,7 +411,9 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
// Create new DwarfCompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
DwarfCompileUnit &
-DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
+DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ if (auto *CU = CUMap.lookup(DIUnit))
+ return *CU;
StringRef FN = DIUnit->getFilename();
CompilationDir = DIUnit->getDirectory();
@@ -407,7 +425,7 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- DIUnit->getSplitDebugFilename());
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
}
// LTO with assembly output shares a single line table amongst multiple CUs.
@@ -418,7 +436,14 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
NewCU.getUniqueID(), CompilationDir);
- NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
+ StringRef Producer = DIUnit->getProducer();
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty()) {
+ std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
+ NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
+ } else
+ NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
+
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
@@ -521,7 +546,12 @@ void DwarfDebug::beginModule() {
}
for (DICompileUnit *CUNode : M->debug_compile_units()) {
- DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
+ if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
+ CUNode->getGlobalVariables().empty() &&
+ CUNode->getImportedEntities().empty() && CUNode->getMacros().empty())
+ continue;
+
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
CU.addImportedEntity(IE);
@@ -544,7 +574,6 @@ void DwarfDebug::beginModule() {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
- if (!RT->isExternalTypeRef())
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
@@ -564,22 +593,17 @@ void DwarfDebug::finishVariableDefinitions() {
// DIE::getUnit isn't simple - it walks parent pointers, etc.
DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
assert(Unit);
- DbgVariable *AbsVar = getExistingAbstractVariable(
- InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
- if (AbsVar && AbsVar->getDIE()) {
- Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
- *AbsVar->getDIE());
- } else
- Unit->applyVariableAttributes(*Var, *VariableDie);
+ Unit->finishVariableDefinition(*Var);
}
}
void DwarfDebug::finishSubprogramDefinitions() {
- for (const DISubprogram *SP : ProcessedSPNodes)
- if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
- forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
- CU.finishSubprogramDefinition(SP);
- });
+ for (const DISubprogram *SP : ProcessedSPNodes) {
+ assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug);
+ forBothCUs(
+ getOrCreateDwarfCompileUnit(SP->getUnit()),
+ [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); });
+ }
}
void DwarfDebug::finalizeModuleInfo() {
@@ -589,6 +613,13 @@ void DwarfDebug::finalizeModuleInfo() {
finishVariableDefinitions();
+ // Include the DWO file name in the hash if there's more than one CU.
+ // This handles ThinLTO's situation where imported CUs may very easily be
+ // duplicate with the same CU partially imported into another ThinLTO unit.
+ StringRef DWOName;
+ if (CUMap.size() > 1)
+ DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
+
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -603,7 +634,8 @@ void DwarfDebug::finalizeModuleInfo() {
auto *SkCU = TheCU.getSkeleton();
if (useSplitDwarf()) {
// Emit a unique identifier for this CU.
- uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
+ uint64_t ID =
+ DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
@@ -712,63 +744,40 @@ void DwarfDebug::endModule() {
}
// Emit the pubnames and pubtypes sections if requested.
- if (HasDwarfPubSections) {
+ // The condition is optimistically correct - any CU not using GMLT (&
+ // implicit/default pubnames state) might still have pubnames.
+ if (hasDwarfPubSections(/* gmlt */ false)) {
emitDebugPubNames(GenerateGnuPubSections);
emitDebugPubTypes(GenerateGnuPubSections);
}
// clean up.
- AbstractVariables.clear();
-}
-
-// Find abstract variable, if any, associated with Var.
-DbgVariable *
-DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed) {
- // More then one inlined variable corresponds to one abstract variable.
- Cleansed = IV.first;
- auto I = AbstractVariables.find(Cleansed);
- if (I != AbstractVariables.end())
- return I->second.get();
- return nullptr;
+ // FIXME: AbstractVariables.clear();
}
-DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
- const DILocalVariable *Cleansed;
- return getExistingAbstractVariable(IV, Cleansed);
-}
-
-void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
- LexicalScope *Scope) {
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
- InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
- AbstractVariables[Var] = std::move(AbsDbgVariable);
-}
-
-void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
+void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
- if (getExistingAbstractVariable(IV, Cleansed))
+ if (CU.getExistingAbstractVariable(IV, Cleansed))
return;
- createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+ CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
-void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
InlinedVariable IV, const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
- if (getExistingAbstractVariable(IV, Cleansed))
+ if (CU.getExistingAbstractVariable(IV, Cleansed))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
- createAbstractVariable(Cleansed, Scope);
+ CU.createAbstractVariable(Cleansed, Scope);
}
-
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
- DenseSet<InlinedVariable> &Processed) {
+ DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
@@ -783,7 +792,7 @@ void DwarfDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
- ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+ ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
@@ -954,24 +963,71 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
}
}
-DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
+DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
InlinedVariable IV) {
- ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
+ ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
return ConcreteVariables.back().get();
}
-// Determine whether this DBG_VALUE is valid at the beginning of the function.
-static bool validAtEntry(const MachineInstr *MInsn) {
- auto MBB = MInsn->getParent();
- // Is it in the entry basic block?
- if (!MBB->pred_empty())
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
return false;
- for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
- if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
+
+ // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // Early exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
return false;
- return true;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
+
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
+
+ // Fail if there are instructions belonging to our scope in another block.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (LScopeEnd->getParent() != MBB)
+ return false;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
+ return true;
+
+ return false;
}
// Find variables for each lexical scope.
@@ -979,7 +1035,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
DenseSet<InlinedVariable> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
- collectVariableInfoFromMFTable(Processed);
+ collectVariableInfoFromMFTable(TheCU, Processed);
for (const auto &I : DbgValues) {
InlinedVariable IV = I.first;
@@ -1001,16 +1057,14 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
continue;
Processed.insert(IV);
- DbgVariable *RegVar = createConcreteVariable(*Scope, IV);
+ DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV);
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- // Check if there is a single DBG_VALUE, valid throughout the function.
- // A single constant is also considered valid for the entire function.
+ // Check if there is a single DBG_VALUE, valid throughout the var's scope.
if (Ranges.size() == 1 &&
- (MInsn->getOperand(0).isImm() ||
- (validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
+ validThroughout(LScopes, MInsn, Ranges.front().second)) {
RegVar->initializeDbgValue(MInsn);
continue;
}
@@ -1037,7 +1091,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
for (const DILocalVariable *DV : SP->getVariables()) {
if (Processed.insert(InlinedVariable(DV, nullptr)).second)
if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
- createConcreteVariable(*Scope, InlinedVariable(DV, nullptr));
+ createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
}
}
@@ -1046,8 +1100,12 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
+ const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram();
+ if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
+ return;
+
// Check if source location changes, but ignore DBG_VALUE and CFI locations.
- if (MI->isDebugValue() || MI->isCFIInstruction())
+ if (MI->isMetaInstruction())
return;
const DebugLoc &DL = MI->getDebugLoc();
// When we emit a line-0 record, we don't update PrevInstLoc; so look at
@@ -1129,7 +1187,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// the beginning of the function body.
for (const auto &MBB : *MF)
for (const auto &MI : MBB)
- if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
MI.getDebugLoc())
return MI.getDebugLoc();
return DebugLoc();
@@ -1137,75 +1195,50 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
-void DwarfDebug::beginFunction(const MachineFunction *MF) {
+void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
- // If there's no debug info for the function we're not going to do anything.
- if (!MMI->hasDebugInfo())
+ auto *SP = MF->getFunction()->getSubprogram();
+ assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode());
+ if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
- auto DI = MF->getFunction()->getSubprogram();
- if (!DI)
- return;
-
- // Grab the lexical scopes for the function, if we don't have any of those
- // then we're not going to be able to do anything.
- DebugHandlerBase::beginFunction(MF);
- if (LScopes.empty())
- return;
+ DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- // FnScope->getScopeNode() and DI->second should represent the same function,
- // though they may not be the same MDNode due to inline functions merged in
- // LTO where the debug info metadata still differs (either due to distinct
- // written differences - two versions of a linkonce_odr function
- // written/copied into two separate files, or some sub-optimal metadata that
- // isn't structurally identical (see: file path/name info from clang, which
- // includes the directory of the cpp file being built, even when the file name
- // is absolute (such as an <> lookup header)))
- auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
- DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit());
- if (!TheCU) {
- assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug &&
- "DICompileUnit missing from llvm.dbg.cu?");
- return;
- }
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
else
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
- if (DILocation *L = PrologEndLoc) {
+ if (PrologEndLoc) {
// We'd like to list the prologue as "not statements" but GDB behaves
// poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- auto *SP = L->getInlinedAtScope()->getSubprogram();
+ auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
}
}
+void DwarfDebug::skippedNonDebugFunction() {
+ // If we don't have a subprogram for this function then there will be a hole
+ // in the range information. Keep note of this by setting the previously used
+ // section to nullptr.
+ PrevCU = nullptr;
+ CurFn = nullptr;
+}
+
// Gather and emit post-function debug information.
-void DwarfDebug::endFunction(const MachineFunction *MF) {
+void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
+ const DISubprogram *SP = MF->getFunction()->getSubprogram();
+
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
- const DISubprogram *SP = MF->getFunction()->getSubprogram();
- if (!MMI->hasDebugInfo() || !SP ||
- SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
- // If we don't have a subprogram for this function then there will be a hole
- // in the range information. Keep note of this by setting the previously
- // used section to nullptr.
- PrevCU = nullptr;
- CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
- return;
- }
-
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
@@ -1220,17 +1253,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
- // subroutines inside it.
- if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
+ // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
+ // is still needed as we need its source location.
+ if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
+ TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
- assert(DbgValues.empty());
- // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
- // by a -gmlt CU. Add a test and remove this assertion.
- assert(AbstractVariables.empty());
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
return;
}
@@ -1244,12 +1274,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
for (const DILocalVariable *DV : SP->getVariables()) {
if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
continue;
- ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+ ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
DV->getScope());
assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
&& "ensureAbstractVariableIsCreated inserted abstract scopes");
}
- constructAbstractSubprogramScopeDIE(AScope);
+ constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
ProcessedSPNodes.insert(SP);
@@ -1266,7 +1296,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
InfoHolder.getScopeVariables().clear();
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
}
// Register a source line with debug info. Returns the unique label that was
@@ -1361,6 +1390,18 @@ void DwarfDebug::emitAccelTypes() {
/// computeIndexValue - Compute the gdb index value for the DIE and CU.
static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
const DIE *Die) {
+ // Entities that ended up only in a Type Unit reference the CU instead (since
+ // the pub entry has offsets within the CU there's no real offset that can be
+ // provided anyway). As it happens all such entities (namespaces and types,
+ // types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
+ // not to be true it would be necessary to persist this information from the
+ // point at which the entry is added to the index data structure - since by
+ // the time the index is built from that, the original type/namespace DIE in a
+ // type unit has already been destroyed so it can't be queried for properties
+ // like tag, etc.
+ if (Die->getTag() == dwarf::DW_TAG_compile_unit)
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
+ dwarf::GIEL_EXTERNAL);
dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
// We could have a specification DIE that has our most of our knowledge,
@@ -1418,7 +1459,7 @@ void DwarfDebug::emitDebugPubSection(
const auto &Globals = (TheU->*Accessor)();
- if (Globals.empty())
+ if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes()))
continue;
if (auto *Skeleton = TheU->getSkeleton())
@@ -1498,27 +1539,36 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
DwarfExpression &DwarfExpr) {
- DIExpressionCursor ExprCursor(Value.getExpression());
- DwarfExpr.addFragmentOffset(Value.getExpression());
+ auto *DIExpr = Value.getExpression();
+ DIExpressionCursor ExprCursor(DIExpr);
+ DwarfExpr.addFragmentOffset(DIExpr);
// Regular entry.
if (Value.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
BT->getEncoding() == dwarf::DW_ATE_signed_char))
- DwarfExpr.AddSignedConstant(Value.getInt());
+ DwarfExpr.addSignedConstant(Value.getInt());
else
- DwarfExpr.AddUnsignedConstant(Value.getInt());
+ DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
- MachineLocation Loc = Value.getLoc();
+ MachineLocation Location = Value.getLoc();
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect() && Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(Location.getOffset());
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
- if (Loc.getOffset())
- DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
- else
- DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ return DwarfExpr.addExpression(std::move(Cursor));
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.AddUnsignedConstant(RawBytes);
+ DwarfExpr.addUnsignedConstant(RawBytes);
}
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(ExprCursor));
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1558,10 +1608,13 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
// Emit locations into the debug loc section.
void DwarfDebug::emitDebugLoc() {
+ if (DebugLocs.getLists().empty())
+ return;
+
// Start the dwarf loc section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->MAI->getCodePointerSize();
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->EmitLabel(List.Label);
const DwarfCompileUnit *CU = List.CU;
@@ -1691,7 +1744,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
- unsigned PtrSize = Asm->getDataLayout().getPointerSize();
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
// Build a list of CUs used.
std::vector<DwarfCompileUnit *> CUs;
@@ -1769,12 +1822,15 @@ void DwarfDebug::emitDebugARanges() {
/// Emit address ranges into a debug ranges section.
void DwarfDebug::emitDebugRanges() {
+ if (CUMap.empty())
+ return;
+
// Start the dwarf ranges section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfRangesSection());
// Size for our labels.
- unsigned char Size = Asm->getDataLayout().getPointerSize();
+ unsigned char Size = Asm->MAI->getCodePointerSize();
// Grab the specific ranges for the compile units in the module.
for (const auto &I : CUMap) {
@@ -1848,6 +1904,9 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
/// Emit macros into a debug macinfo section.
void DwarfDebug::emitDebugMacinfo() {
+ if (CUMap.empty())
+ return;
+
// Start the dwarf macinfo section.
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfMacinfoSection());
@@ -1869,7 +1928,7 @@ void DwarfDebug::emitDebugMacinfo() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
- U.getCUNode()->getSplitDebugFilename());
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -1940,11 +1999,11 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
MD5::MD5Result Result;
Hash.final(Result);
- return support::endian::read64le(Result + 8);
+ return Result.high();
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 253e3f0..5dfe06c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -89,7 +89,7 @@ public:
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
- assert(~FI && "Expected valid index");
+ assert(FI != INT_MAX && "Expected valid index");
FrameIndexExprs.push_back({FI, E});
}
@@ -134,6 +134,13 @@ public:
assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
+ if (FrameIndexExprs.size()) {
+ auto *Expr = FrameIndexExprs.back().Expr;
+ // Get rid of duplicate non-fragment entries. More than one non-fragment
+ // dbg.declare makes no sense so ignore all but the first.
+ if (!Expr || !Expr->isFragment())
+ return;
+ }
FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
assert(all_of(FrameIndexExprs,
[](FrameIndexExpr &FIE) {
@@ -210,7 +217,6 @@ class DwarfDebug : public DebugHandlerBase {
DenseMap<const MCSymbol *, uint64_t> SymSize;
/// Collection of abstract variables.
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
@@ -247,9 +253,6 @@ class DwarfDebug : public DebugHandlerBase {
std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
TypeUnitsUnderConstruction;
- /// Whether to emit the pubnames/pubtypes sections.
- bool HasDwarfPubSections;
-
/// Whether to use the GNU TLS opcode (instead of the standard opcode).
bool UseGNUTLSOpcode;
@@ -313,20 +316,16 @@ class DwarfDebug : public DebugHandlerBase {
typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
- /// Find abstract variable associated with Var.
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed);
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
- void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
- void ensureAbstractVariableIsCreated(InlinedVariable Var,
+ void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var,
const MDNode *Scope);
- void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
+ void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var,
const MDNode *Scope);
- DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
+ DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope, InlinedVariable IV);
/// Construct a DIE for this abstract scope.
- void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
void finishVariableDefinitions();
@@ -420,11 +419,11 @@ class DwarfDebug : public DebugHandlerBase {
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
- void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
+ void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const;
/// Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
- DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
+ DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit);
/// Construct imported_module or imported_declaration DIE.
void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
@@ -446,7 +445,17 @@ class DwarfDebug : public DebugHandlerBase {
const DbgValueHistoryMap::InstrRanges &Ranges);
/// Collect variable information from the side table maintained by MF.
- void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
+ void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
+ DenseSet<InlinedVariable> &P);
+
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function debug information.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+ void skippedNonDebugFunction() override;
public:
//===--------------------------------------------------------------------===//
@@ -463,12 +472,6 @@ public:
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
- /// Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// Gather and emit post-function debug information.
- void endFunction(const MachineFunction *MF) override;
-
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
@@ -515,6 +518,8 @@ public:
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
+ bool shareAcrossDWOCUs() const;
+
/// Returns the Dwarf Version.
uint16_t getDwarfVersion() const;
@@ -555,6 +560,8 @@ public:
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ bool hasDwarfPubSections(bool includeMinimalInlineScopes) const;
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 61b2c7e6..fe38ee8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -14,87 +14,88 @@
#include "DwarfExpression.h"
#include "DwarfDebug.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
- assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((LocationKind == Unknown || LocationKind == Register) &&
+ "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
} else {
- EmitOp(dwarf::DW_OP_regx, Comment);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_regx, Comment);
+ emitUnsigned(DwarfReg);
}
}
-void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
+void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert(LocationKind != Register && "location description already locked down");
if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
- EmitOp(dwarf::DW_OP_bregx);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(DwarfReg);
}
- EmitSigned(Offset);
- if (Deref)
- EmitOp(dwarf::DW_OP_deref);
+ emitSigned(Offset);
+}
+
+void DwarfExpression::addFBReg(int Offset) {
+ emitOp(dwarf::DW_OP_fbreg);
+ emitSigned(Offset);
}
-void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
if (!SizeInBits)
return;
const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
- EmitOp(dwarf::DW_OP_bit_piece);
- EmitUnsigned(SizeInBits);
- EmitUnsigned(OffsetInBits);
+ emitOp(dwarf::DW_OP_bit_piece);
+ emitUnsigned(SizeInBits);
+ emitUnsigned(OffsetInBits);
} else {
- EmitOp(dwarf::DW_OP_piece);
+ emitOp(dwarf::DW_OP_piece);
unsigned ByteSize = SizeInBits / SizeOfByte;
- EmitUnsigned(ByteSize);
+ emitUnsigned(ByteSize);
}
this->OffsetInBits += SizeInBits;
}
-void DwarfExpression::AddShr(unsigned ShiftBy) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(ShiftBy);
- EmitOp(dwarf::DW_OP_shr);
+void DwarfExpression::addShr(unsigned ShiftBy) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(ShiftBy);
+ emitOp(dwarf::DW_OP_shr);
}
-bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
- unsigned MachineReg, int Offset) {
- if (isFrameRegister(TRI, MachineReg)) {
- // If variable offset is based in frame register then use fbreg.
- EmitOp(dwarf::DW_OP_fbreg);
- EmitSigned(Offset);
- return true;
- }
-
- int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
- if (DwarfReg < 0)
- return false;
-
- AddRegIndirect(DwarfReg, Offset);
- return true;
+void DwarfExpression::addAnd(unsigned Mask) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Mask);
+ emitOp(dwarf::DW_OP_and);
}
-bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
- if (!TRI.isPhysicalRegister(MachineReg))
+ if (!TRI.isPhysicalRegister(MachineReg)) {
+ if (isFrameRegister(TRI, MachineReg)) {
+ DwarfRegs.push_back({-1, 0, nullptr});
+ return true;
+ }
return false;
+ }
int Reg = TRI.getDwarfRegNum(MachineReg, false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
- AddReg(Reg);
+ DwarfRegs.push_back({Reg, 0, nullptr});
return true;
}
@@ -106,7 +107,7 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
- AddReg(Reg, "super-register");
+ DwarfRegs.push_back({Reg, 0, "super-register"});
// Use a DW_OP_bit_piece to describe the sub-register.
setSubRegisterPiece(Size, RegOffset);
return true;
@@ -116,8 +117,9 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
// Otherwise, attempt to find a covering set of sub-register numbers.
// For example, Q0 on ARM is a composition of D0+D1.
unsigned CurPos = 0;
- // The size of the register in bits, assuming 8 bits per byte.
- unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ // The size of the register in bits.
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg);
+ unsigned RegSize = TRI.getRegSizeInBits(*RC);
// Keep track of the bits in the register we already emitted, so we
// can avoid emitting redundant aliasing subregs.
SmallBitVector Coverage(RegSize, false);
@@ -136,100 +138,166 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
// If this sub-register has a DWARF number and we haven't covered
// its range, emit a DWARF piece for it.
if (Reg >= 0 && Intersection.any()) {
- AddReg(Reg, "sub-register");
+ // Emit a piece for any gap in the coverage.
+ if (Offset > CurPos)
+ DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
+ DwarfRegs.push_back(
+ {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
if (Offset >= MaxSize)
break;
- // Emit a piece for the any gap in the coverage.
- if (Offset > CurPos)
- AddOpPiece(Offset - CurPos);
- AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
- CurPos = Offset + Size;
// Mark it as emitted.
Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
}
}
return CurPos;
}
-void DwarfExpression::AddStackValue() {
+void DwarfExpression::addStackValue() {
if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ emitOp(dwarf::DW_OP_stack_value);
}
-void DwarfExpression::AddSignedConstant(int64_t Value) {
- EmitOp(dwarf::DW_OP_consts);
- EmitSigned(Value);
- AddStackValue();
+void DwarfExpression::addSignedConstant(int64_t Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Value);
}
-void DwarfExpression::AddUnsignedConstant(uint64_t Value) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Value);
- AddStackValue();
+void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
}
-void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
+void DwarfExpression::addUnsignedConstant(const APInt &Value) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+
unsigned Size = Value.getBitWidth();
const uint64_t *Data = Value.getRawData();
// Chop it up into 64-bit pieces, because that's the maximum that
- // AddUnsignedConstant takes.
+ // addUnsignedConstant takes.
unsigned Offset = 0;
while (Offset < Size) {
- AddUnsignedConstant(*Data++);
+ addUnsignedConstant(*Data++);
if (Offset == 0 && Size <= 64)
break;
- AddOpPiece(std::min(Size-Offset, 64u), Offset);
+ addStackValue();
+ addOpPiece(std::min(Size - Offset, 64u), Offset);
Offset += 64;
}
}
-bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &ExprCursor,
unsigned MachineReg,
unsigned FragmentOffsetInBits) {
- if (!ExprCursor)
- return AddMachineReg(TRI, MachineReg);
+ auto Fragment = ExprCursor.getFragmentInfo();
+ if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) {
+ LocationKind = Unknown;
+ return false;
+ }
- // Pattern-match combinations for which more efficient representations exist
- // first.
- bool ValidReg = false;
+ bool HasComplexExpression = false;
auto Op = ExprCursor.peek();
- switch (Op->getOp()) {
- default: {
- auto Fragment = ExprCursor.getFragmentInfo();
- ValidReg = AddMachineReg(TRI, MachineReg,
- Fragment ? Fragment->SizeInBits : ~1U);
- break;
+ if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ HasComplexExpression = true;
+
+ // If the register can only be described by a complex expression (i.e.,
+ // multiple subregisters) it doesn't safely compose with another complex
+ // expression. For example, it is not possible to apply a DW_OP_deref
+ // operation to multiple DW_OP_pieces.
+ if (HasComplexExpression && DwarfRegs.size() > 1) {
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
}
- case dwarf::DW_OP_plus:
- case dwarf::DW_OP_minus: {
- // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
- // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
- auto N = ExprCursor.peekNext();
- if (N && N->getOp() == dwarf::DW_OP_deref) {
- unsigned Offset = Op->getArg(0);
- ValidReg = AddMachineRegIndirect(
- TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
- ExprCursor.consume(2);
- } else
- ValidReg = AddMachineReg(TRI, MachineReg);
- break;
+
+ // Handle simple register locations.
+ if (LocationKind != Memory && !HasComplexExpression) {
+ for (auto &Reg : DwarfRegs) {
+ if (Reg.DwarfRegNo >= 0)
+ addReg(Reg.DwarfRegNo, Reg.Comment);
+ addOpPiece(Reg.Size);
+ }
+ DwarfRegs.clear();
+ return true;
}
- case dwarf::DW_OP_deref:
- // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+
+ // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+ if (DwarfVersion < 4)
+ if (std::any_of(ExprCursor.begin(), ExprCursor.end(),
+ [](DIExpression::ExprOperand Op) -> bool {
+ return Op.getOp() == dwarf::DW_OP_stack_value;
+ })) {
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
+ assert(DwarfRegs.size() == 1);
+ auto Reg = DwarfRegs[0];
+ bool FBReg = isFrameRegister(TRI, MachineReg);
+ int SignedOffset = 0;
+ assert(Reg.Size == 0 && "subregister has same size as superregister");
+
+ // Pattern-match combinations for which more efficient representations exist.
+ // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
+ if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) {
+ SignedOffset = Op->getArg(0);
ExprCursor.take();
- break;
}
- return ValidReg;
+ // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]
+ // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset]
+ // If Reg is a subregister we need to mask it out before subtracting.
+ if (Op && Op->getOp() == dwarf::DW_OP_constu) {
+ auto N = ExprCursor.peekNext();
+ if (N && (N->getOp() == dwarf::DW_OP_plus ||
+ (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
+ int Offset = Op->getArg(0);
+ SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset;
+ ExprCursor.consume(2);
+ }
+ }
+
+ if (FBReg)
+ addFBReg(SignedOffset);
+ else
+ addBReg(Reg.DwarfRegNo, SignedOffset);
+ DwarfRegs.clear();
+ return true;
+}
+
+/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
+static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
+ while (ExprCursor) {
+ auto Op = ExprCursor.take();
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_deref:
+ case dwarf::DW_OP_LLVM_fragment:
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
}
-void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ auto N = ExprCursor.peek();
+ if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
+ maskSubRegister();
+
while (ExprCursor) {
auto Op = ExprCursor.take();
switch (Op->getOp()) {
@@ -241,49 +309,91 @@ void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
- // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+ // If addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
// of the pieces that was already emitted.
SizeInBits -= OffsetInBits - FragmentOffset;
- // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+ // If addMachineReg requested a DW_OP_bit_piece to stencil out a
// sub-register that is smaller than the current fragment's size, use it.
if (SubRegisterSizeInBits)
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
-
- AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+
+ // Emit a DW_OP_stack_value for implicit location descriptions.
+ if (LocationKind == Implicit)
+ addStackValue();
+
+ // Emit the DW_OP_piece.
+ addOpPiece(SizeInBits, SubRegisterOffsetInBits);
setSubRegisterPiece(0, 0);
- break;
+ // Reset the location description kind.
+ LocationKind = Unknown;
+ return;
}
- case dwarf::DW_OP_plus:
- EmitOp(dwarf::DW_OP_plus_uconst);
- EmitUnsigned(Op->getArg(0));
+ case dwarf::DW_OP_plus_uconst:
+ assert(LocationKind != Register);
+ emitOp(dwarf::DW_OP_plus_uconst);
+ emitUnsigned(Op->getArg(0));
break;
+ case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
- // There is no OP_minus_uconst.
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
- EmitOp(dwarf::DW_OP_minus);
+ emitOp(Op->getOp());
break;
- case dwarf::DW_OP_deref:
- EmitOp(dwarf::DW_OP_deref);
+ case dwarf::DW_OP_deref: {
+ assert(LocationKind != Register);
+ if (LocationKind != Memory && isMemoryLocation(ExprCursor))
+ // Turning this into a memory location description makes the deref
+ // implicit.
+ LocationKind = Memory;
+ else
+ emitOp(dwarf::DW_OP_deref);
break;
+ }
case dwarf::DW_OP_constu:
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
+ assert(LocationKind != Register);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
- AddStackValue();
+ LocationKind = Implicit;
+ break;
+ case dwarf::DW_OP_swap:
+ assert(LocationKind != Register);
+ emitOp(dwarf::DW_OP_swap);
+ break;
+ case dwarf::DW_OP_xderef:
+ assert(LocationKind != Register);
+ emitOp(dwarf::DW_OP_xderef);
break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
+
+ if (LocationKind == Implicit)
+ // Turn this into an implicit location description.
+ addStackValue();
+}
+
+/// add masking operations to stencil out a subregister.
+void DwarfExpression::maskSubRegister() {
+ assert(SubRegisterSizeInBits && "no subregister was registered");
+ if (SubRegisterOffsetInBits > 0)
+ addShr(SubRegisterOffsetInBits);
+ uint64_t Mask = (1ULL << (uint64_t)SubRegisterSizeInBits) - 1ULL;
+ addAnd(Mask);
}
+
void DwarfExpression::finalize() {
- if (SubRegisterSizeInBits)
- AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+ assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
+ // Emit any outstanding DW_OP_piece operations to mask out subregisters.
+ if (SubRegisterSizeInBits == 0)
+ return;
+ // Don't emit a DW_OP_piece for a subregister at offset 0.
+ if (SubRegisterOffsetInBits == 0)
+ return;
+ addOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
}
void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
@@ -294,6 +404,6 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
assert(FragmentOffset >= OffsetInBits &&
"overlapping or duplicate fragments");
if (FragmentOffset > OffsetInBits)
- AddOpPiece(FragmentOffset - OffsetInBits);
+ addOpPiece(FragmentOffset - OffsetInBits);
OffsetInBits = FragmentOffset;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index fd90fa0..728f8ad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -42,6 +42,9 @@ public:
DIExpressionCursor(ArrayRef<uint64_t> Expr)
: Start(Expr.begin()), End(Expr.end()) {}
+ DIExpressionCursor(const DIExpressionCursor &C)
+ : Start(C.Start), End(C.End) {}
+
/// Consume one operation.
Optional<DIExpression::ExprOperand> take() {
if (Start == End)
@@ -72,6 +75,8 @@ public:
}
/// Determine whether there are any operations left in this expression.
operator bool() const { return Start != End; }
+ DIExpression::expr_op_iterator begin() const { return Start; }
+ DIExpression::expr_op_iterator end() const { return End; }
/// Retrieve the fragment information, if any.
Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
@@ -84,14 +89,27 @@ public:
/// entry.
class DwarfExpression {
protected:
- unsigned DwarfVersion;
+ /// Holds information about all subregisters comprising a register location.
+ struct Register {
+ int DwarfRegNo;
+ unsigned Size;
+ const char *Comment;
+ };
+
+ /// The register location, if any.
+ SmallVector<Register, 2> DwarfRegs;
+
/// Current Fragment Offset in Bits.
uint64_t OffsetInBits = 0;
+ unsigned DwarfVersion;
/// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
unsigned SubRegisterSizeInBits = 0;
unsigned SubRegisterOffsetInBits = 0;
+ /// The kind of location description being produced.
+ enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
/// to represent a subregister.
void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
@@ -99,35 +117,55 @@ protected:
SubRegisterOffsetInBits = OffsetInBits;
}
-public:
- DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- virtual ~DwarfExpression() {};
-
- /// This needs to be called last to commit any pending changes.
- void finalize();
+ /// Add masking operations to stencil out a subregister.
+ void maskSubRegister();
/// Output a dwarf operand and an optional assembler comment.
- virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+ virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
/// Emit a raw signed value.
- virtual void EmitSigned(int64_t Value) = 0;
+ virtual void emitSigned(int64_t Value) = 0;
/// Emit a raw unsigned value.
- virtual void EmitUnsigned(uint64_t Value) = 0;
+ virtual void emitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
- /// Emit a dwarf register operation.
- void AddReg(int DwarfReg, const char *Comment = nullptr);
- /// Emit an (double-)indirect dwarf register operation.
- void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
+ /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
+ /// register location description.
+ void addReg(int DwarfReg, const char *Comment = nullptr);
+ /// Emit a DW_OP_breg operation.
+ void addBReg(int DwarfReg, int Offset);
+ /// Emit DW_OP_fbreg <Offset>.
+ void addFBReg(int Offset);
+
+ /// Emit a partial DWARF register operation.
+ ///
+ /// \param MachineReg The register number.
+ /// \param MaxSize If the register must be composed from
+ /// sub-registers this is an upper bound
+ /// for how many bits the emitted DW_OP_piece
+ /// may cover.
+ ///
+ /// If size and offset is zero an operation for the entire register is
+ /// emitted: Some targets do not provide a DWARF register number for every
+ /// register. If this is the case, this function will attempt to emit a DWARF
+ /// register by emitting a fragment of a super-register or by piecing together
+ /// multiple subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned MaxSize = ~1U);
+
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
/// \param OffsetInBits This is an optional offset into the location that
/// is at the top of the DWARF stack.
- void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+ void addOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
- /// Emit a shift-right dwarf expression.
- void AddShr(unsigned ShiftBy);
+ /// Emit a shift-right dwarf operation.
+ void addShr(unsigned ShiftBy);
+ /// Emit a bitwise and dwarf operation.
+ void addAnd(unsigned Mask);
/// Emit a DW_OP_stack_value, if supported.
///
@@ -140,48 +178,39 @@ public:
/// constant value, so the producers and consumers started to rely on
/// heuristics to disambiguate the value vs. location status of the
/// expression. See PR21176 for more details.
- void AddStackValue();
+ void addStackValue();
- /// Emit an indirect dwarf register operation for the given machine register.
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
- int Offset = 0);
+ ~DwarfExpression() = default;
+public:
+ DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- /// Emit a partial DWARF register operation.
- ///
- /// \param MachineReg The register number.
- /// \param MaxSize If the register must be composed from
- /// sub-registers this is an upper bound
- /// for how many bits the emitted DW_OP_piece
- /// may cover.
- ///
- /// If size and offset is zero an operation for the entire register is
- /// emitted: Some targets do not provide a DWARF register number for every
- /// register. If this is the case, this function will attempt to emit a DWARF
- /// register by emitting a fragment of a super-register or by piecing together
- /// multiple subregisters that alias the register.
- ///
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
- unsigned MaxSize = ~1U);
+ /// This needs to be called last to commit any pending changes.
+ void finalize();
/// Emit a signed constant.
- void AddSignedConstant(int64_t Value);
+ void addSignedConstant(int64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(uint64_t Value);
+ void addUnsignedConstant(uint64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(const APInt &Value);
+ void addUnsignedConstant(const APInt &Value);
+
+ /// Lock this down to become a memory location description.
+ void setMemoryLocationKind() {
+ assert(LocationKind == Unknown);
+ LocationKind = Memory;
+ }
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
///
- /// \param FragmentOffsetInBits If this is one fragment out of a fragmented
+ /// \param FragmentOffsetInBits If this is one fragment out of a
+ /// fragmented
/// location, this is the offset of the
/// fragment inside the entire variable.
/// \return false if no DWARF register exists
/// for MachineReg.
- bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ bool addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
/// Emit all remaining operations in the DIExpressionCursor.
@@ -189,7 +218,7 @@ public:
/// \param FragmentOffsetInBits If this is one fragment out of multiple
/// locations, this is the offset of the
/// fragment inside the entire variable.
- void AddExpression(DIExpressionCursor &&Expr,
+ void addExpression(DIExpressionCursor &&Expr,
unsigned FragmentOffsetInBits = 0);
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
@@ -198,33 +227,32 @@ public:
};
/// DwarfExpression implementation for .debug_loc entries.
-class DebugLocDwarfExpression : public DwarfExpression {
+class DebugLocDwarfExpression final : public DwarfExpression {
ByteStreamer &BS;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
public:
DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
: DwarfExpression(DwarfVersion), BS(BS) {}
-
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
- bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
-class DIEDwarfExpression : public DwarfExpression {
+class DIEDwarfExpression final : public DwarfExpression {
const AsmPrinter &AP;
DwarfUnit &DU;
DIELoc &DIE;
-public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
DIELoc *finalize() {
DwarfExpression::finalize();
return &DIE;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index d4d2ed2..54924e9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -53,6 +53,7 @@ class DwarfFile {
// Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
/// be shared across CUs, that is why we keep the map here instead
@@ -105,6 +106,9 @@ public:
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
return AbstractSPDies;
}
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ return AbstractVariables;
+ }
void insertDIE(const MDNode *TypeMD, DIE *Die) {
DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 2a866c0..4f4ebfc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -18,18 +18,19 @@
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -54,15 +55,15 @@ DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
: DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
DIE(DIE) {}
-void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
+void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
-void DIEDwarfExpression::EmitSigned(int64_t Value) {
+void DIEDwarfExpression::emitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
-void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
@@ -73,8 +74,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
- Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
+ : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag),
+ CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@@ -98,25 +99,35 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
default:
break;
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
+ // The languages below have valid values in all DWARF versions.
case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_ObjC:
- case dwarf::DW_LANG_ObjC_plus_plus:
return 0;
case dwarf::DW_LANG_Fortran77:
case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran95:
return 1;
- // The languages below have valid values only if the DWARF version >= 4.
+ // The languages below have valid values only if the DWARF version >= 3.
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ if (DD->getDwarfVersion() >= 3)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Fortran95:
+ if (DD->getDwarfVersion() >= 3)
+ return 1;
+ break;
+
+ // Starting with DWARF v4, all defined languages have valid values.
+ case dwarf::DW_LANG_D:
case dwarf::DW_LANG_Java:
case dwarf::DW_LANG_Python:
case dwarf::DW_LANG_UPC:
- case dwarf::DW_LANG_D:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 0;
break;
@@ -127,31 +138,33 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
case dwarf::DW_LANG_Modula2:
case dwarf::DW_LANG_Pascal83:
case dwarf::DW_LANG_PLI:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 1;
break;
- // The languages below have valid values only if the DWARF version >= 5.
- case dwarf::DW_LANG_OpenCL:
- case dwarf::DW_LANG_Go:
- case dwarf::DW_LANG_Haskell:
+ // The languages below are new in DWARF v5.
+ case dwarf::DW_LANG_BLISS:
+ case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_C_plus_plus_03:
case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_RenderScript:
case dwarf::DW_LANG_Rust:
- case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_Swift:
- case dwarf::DW_LANG_Dylan:
- case dwarf::DW_LANG_C_plus_plus_14:
- if (dwarf::DWARF_VERSION >= 5)
+ if (DD->getDwarfVersion() >= 5)
return 0;
break;
- case dwarf::DW_LANG_Modula3:
- case dwarf::DW_LANG_Julia:
case dwarf::DW_LANG_Fortran03:
case dwarf::DW_LANG_Fortran08:
- if (dwarf::DWARF_VERSION >= 5)
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Modula3:
+ if (DD->getDwarfVersion() >= 5)
return 1;
break;
}
@@ -160,7 +173,7 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
}
/// Check whether the DIE for this MDNode can be shared across CUs.
-static bool isShareableAcrossCUs(const DINode *D) {
+bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// When the MDNode can be part of the type system, the DIE can be shared
// across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
@@ -168,6 +181,8 @@ static bool isShareableAcrossCUs(const DINode *D) {
// level already) but may be implementable for some value in projects
// building multiple independent libraries with LTO and then linking those
// together.
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return false;
return (isa<DIType>(D) ||
(isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
!GenerateDwarfTypeUnits;
@@ -285,13 +300,6 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
}
-void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier) {
- uint64_t Signature = DD->makeTypeSignature(Identifier);
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
- DIEInteger(Signature));
-}
-
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIEUnit *CU = Die.getUnit();
@@ -369,10 +377,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
}
-void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
- addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
-}
-
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
gives the variable VarName either the struct, or a pointer to the struct, as
@@ -465,50 +469,48 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- SmallVector<uint64_t, 6> DIExpr;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
-
- if (!validReg)
- return;
-
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ if (Location.isIndirect())
+ DwarfExpr.setMemoryLocationKind();
+
+ SmallVector<uint64_t, 9> Ops;
+ if (Location.isIndirect() && Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(Location.getOffset());
+ }
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(forwardingFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(varFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus_uconst);
+ Ops.push_back(varFieldOffset);
}
- Expr.AddExpression(makeArrayRef(DIExpr));
- Expr.finalize();
+
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Return true if type encoding is unsigned.
@@ -645,7 +647,7 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ GlobalValue::dropLLVMManglingEscape(LinkageName));
}
void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
@@ -658,6 +660,14 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
}
}
+/// Add thrown types.
+void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) {
+ for (const auto *Ty : ThrownTypes) {
+ DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die);
+ addType(TT, cast<DIType>(Ty));
+ }
+}
+
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
if (!Context || isa<DIFile>(Context))
return &getUnitDie();
@@ -672,7 +682,7 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getDIE(Context);
}
-DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
auto *Context = resolve(Ty->getScope());
DIE *ContextDIE = getOrCreateContextDIE(Context);
@@ -684,8 +694,7 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
- if (!Ty->isExternalTypeRef())
- updateAcceleratorTables(Context, Ty, TyDIE);
+ updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
@@ -841,6 +850,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
+
+ // If DWARF address space value is other than None, add it for pointer and
+ // reference types as DW_AT_address_class.
+ if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type))
+ addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
+ DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -892,13 +908,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- if (CTy->isExternalTypeRef()) {
- StringRef Identifier = CTy->getIdentifier();
- assert(!Identifier.empty() && "external type ref without identifier");
- addFlag(Buffer, dwarf::DW_AT_declaration);
- return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
- }
-
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -1074,7 +1083,6 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
Name = "(anonymous namespace)";
DD->addAccelNamespace(Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
- addSourceLine(NDie, NS);
if (NS->getExportSymbols())
addFlag(NDie, dwarf::DW_AT_export_symbols);
return &NDie;
@@ -1180,8 +1188,12 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
}
void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal) {
- if (!Minimal)
+ bool SkipSPAttributes) {
+ // If -fdebug-info-for-profiling is enabled, need to emit the subprogram
+ // and its source location.
+ bool SkipSPSourceLocation = SkipSPAttributes &&
+ !CUNode->getDebugInfoForProfiling();
+ if (!SkipSPSourceLocation)
if (applySubprogramDefinitionAttributes(SP, SPDie))
return;
@@ -1189,12 +1201,13 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ if (!SkipSPSourceLocation)
+ addSourceLine(SPDie, SP);
+
// Skip the rest of the attributes under -gmlt to save space.
- if (Minimal)
+ if (SkipSPAttributes)
return;
- addSourceLine(SPDie, SP);
-
// Add the prototype if we have a prototype and we have a C like
// language.
uint16_t Language = getLanguage();
@@ -1241,6 +1254,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
constructSubprogramArguments(SPDie, Args);
}
+ addThrownTypes(SPDie, SP->getThrownTypes());
+
if (SP->isArtificial())
addFlag(SPDie, dwarf::DW_AT_artificial);
@@ -1526,18 +1541,27 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
return &StaticMemberDIE;
}
-void DwarfUnit::emitHeader(bool UseOffsets) {
+void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
- Asm->EmitInt16(DD->getDwarfVersion());
- Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+ unsigned Version = DD->getDwarfVersion();
+ Asm->EmitInt16(Version);
+
+ // DWARF v5 reorders the address size and adds a unit type.
+ if (Version >= 5) {
+ Asm->OutStreamer->AddComment("DWARF Unit Type");
+ Asm->EmitInt8(UT);
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->MAI->getCodePointerSize());
+ }
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
+ Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
Asm->EmitInt32(0);
@@ -1545,12 +1569,16 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
- Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ if (Version <= 4) {
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->MAI->getCodePointerSize());
+ }
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
- DwarfUnit::emitHeader(UseOffsets);
+ DwarfUnit::emitCommonHeader(UseOffsets,
+ DD->useSplitDwarf() ? dwarf::DW_UT_split_type
+ : dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
@@ -1559,8 +1587,46 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
sizeof(Ty->getOffset()));
}
+DIE::value_iterator
+DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ return Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+DIE::value_iterator
+DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ return addSectionDelta(Die, Attribute, Label, Sec);
+}
+
bool DwarfTypeUnit::isDwoUnit() const {
// Since there are no skeleton type units, all type units are dwo type units
// when split DWARF is being used.
return DD->useSplitDwarf();
}
+
+void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalNameForTypeUnit(Name, Context);
+}
+
+void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalTypeUnitType(Ty, Context);
+}
+
+const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
+ if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return nullptr;
+ if (isDwoUnit())
+ return nullptr;
+ return getSection()->getBeginSymbol();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8654d6f..4cc01b3 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -65,7 +65,7 @@ public:
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
/// source file.
- class DwarfUnit : public DIEUnit {
+class DwarfUnit : public DIEUnit {
protected:
/// MDNode for the compile unit.
const DICompileUnit *CUNode;
@@ -103,9 +103,10 @@ protected:
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
-public:
- virtual ~DwarfUnit();
+ bool shareAcrossDWOCUs() const;
+ bool isShareableAcrossCUs(const DINode *D) const;
+public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
@@ -124,12 +125,12 @@ public:
std::string getParentContextString(const DIScope *Context) const;
/// Add a new global name to the compile unit.
- virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
- }
+ virtual void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) = 0;
/// Add a new global type to the compile unit.
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) {}
+ const DIScope *Context) = 0;
/// Returns the DIE map slot for the specified debug variable.
///
@@ -198,9 +199,6 @@ public:
/// Add a type's DW_AT_signature and set the declaration flag.
void addDIETypeSignature(DIE &Die, uint64_t Signature);
- /// Add an attribute containing the type signature for a unique identifier.
- void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
@@ -215,7 +213,6 @@ public:
void addSourceLine(DIE &Die, const DIGlobalVariable *G);
void addSourceLine(DIE &Die, const DISubprogram *SP);
void addSourceLine(DIE &Die, const DIType *Ty);
- void addSourceLine(DIE &Die, const DINamespace *NS);
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
/// Add constant value entry in variable DIE.
@@ -235,6 +232,9 @@ public:
/// Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DINodeArray TParams);
+ /// Add thrown types.
+ void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
+
// FIXME: Should be reformulated in terms of addComplexAddress.
/// Start with the address based on the location provided, and generate the
/// DWARF information necessary to find the actual Block variable (navigating
@@ -256,15 +256,12 @@ public:
DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal = false);
+ bool SkipSPAttributes = false);
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *N);
/// Get context owner's DIE.
- DIE *createTypeDIE(const DICompositeType *Ty);
-
- /// Get context owner's DIE.
DIE *getOrCreateContextDIE(const DIScope *Context);
/// Construct DIEs for types that contain vtables.
@@ -282,17 +279,30 @@ public:
virtual unsigned getHeaderSize() const {
return sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ (DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
+ : 0); // DWARF v5 unit type
}
/// Emit the header for this unit, not including the initial length field.
- virtual void emitHeader(bool UseOffsets);
+ virtual void emitHeader(bool UseOffsets) = 0;
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ /// addSectionDelta - Add a label delta attribute data and value.
+ DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ /// Add a Dwarf section label attribute data and value.
+ DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec);
+
protected:
+ ~DwarfUnit();
+
/// Create new static data member DIE.
DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
@@ -306,6 +316,14 @@ protected:
return Ref.resolve();
}
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
+ /// Emit the common part of the header for this unit.
+ void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
+
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
@@ -330,15 +348,11 @@ private:
/// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
virtual bool isDwoUnit() const = 0;
+ const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
};
-class DwarfTypeUnit : public DwarfUnit {
+class DwarfTypeUnit final : public DwarfUnit {
uint64_t TypeSignature;
const DIE *Ty;
DwarfCompileUnit &CU;
@@ -354,12 +368,19 @@ public:
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
+
/// Emit the header for this unit, not including the initial length field.
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 0a4a7a0..e14d5be 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) {
+ if (SawPotentiallyThrowing && !IsSJLJ) {
CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 6a023b9..c579555 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,22 +13,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -38,13 +36,12 @@ class ErlangGCPrinter : public GCMetadataPrinter {
public:
void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
};
-}
+
+} // end anonymous namespace
static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
X("erlang", "erlang-compatible garbage collector");
-void llvm::linkErlangGCPrinter() {}
-
void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
@@ -121,3 +118,5 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
}
}
}
+
+void llvm::linkErlangGCPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8baee4d..035f1a0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,23 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GCs.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
using namespace llvm;
namespace {
@@ -37,7 +41,8 @@ public:
void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
};
-}
+
+} // end anonymous namespace
static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
Y("ocaml", "ocaml 3.10-compatible collector");
@@ -50,7 +55,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
std::string SymName;
SymName += "caml";
size_t Letter = SymName.size();
- SymName.append(MId.begin(), find(MId, '.'));
+ SymName.append(MId.begin(), llvm::find(MId, '.'));
SymName += "__";
SymName += Id;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 9d7c96a..5d485f2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -14,6 +14,8 @@
#include "WinException.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,8 +31,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWin64EH.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -68,7 +68,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
const Function *F = MF->getFunction();
- shouldEmitMoves = Asm->needsSEHMoves();
+ shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -94,14 +94,14 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If we're not using CFI, we don't want the CFI or the personality, but we
// might want EH tables if we had EH pads.
- if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+ if (!Asm->MAI->usesWindowsCFI()) {
if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
// If this is 32-bit SEH and we don't have any funclets (really invokes),
// make sure we emit the parent offset label. Some unreferenced filter
// functions may still refer to it.
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
StringRef FLinkageName =
- GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
}
shouldEmitLSDA = hasEHFunclets;
@@ -174,7 +174,7 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
// their funclet entry block's number.
const MachineFunction *MF = MBB->getParent();
const Function *F = MF->getFunction();
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
MCContext &Ctx = MF->getContext();
StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
@@ -252,7 +252,7 @@ void WinException::endFunclet() {
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
@@ -536,7 +536,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
// Emit a label assignment with the SEH frame offset so we can use it for
// llvm.x86.seh.recoverfp.
StringRef FLinkageName =
- GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
@@ -635,7 +635,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
auto &OS = *Asm->OutStreamer;
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
MCSymbol *FuncInfoXData = nullptr;
@@ -942,7 +942,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
MCStreamer &OS = *Asm->OutStreamer;
const Function *F = MF->getFunction();
- StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
bool VerboseAsm = OS.isVerboseAsm();
auto AddComment = [&](const Twine &Comment) {
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
index bf5cf10..aa9c8e9 100644
--- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -35,20 +36,17 @@ using namespace llvm;
namespace {
class AtomicExpand: public FunctionPass {
- const TargetMachine *TM;
const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit AtomicExpand(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ AtomicExpand() : FunctionPass(ID), TLI(nullptr) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
private:
- bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
- bool IsStore, bool IsLoad);
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
bool tryExpandAtomicLoad(LoadInst *LI);
@@ -98,12 +96,10 @@ namespace {
char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
- false, false)
+INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
+ false, false)
-FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
- return new AtomicExpand(TM);
-}
+FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
namespace {
// Helper functions to retrieve the size of atomic instructions.
@@ -173,9 +169,14 @@ bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
} // end anonymous namespace
bool AtomicExpand::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
return false;
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLI = TM.getSubtargetImpl(F)->getTargetLowering();
SmallVector<Instruction *, 1> AtomicInsts;
@@ -224,22 +225,16 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (TLI->shouldInsertFencesForAtomic(I)) {
auto FenceOrdering = AtomicOrdering::Monotonic;
- bool IsStore, IsLoad;
if (LI && isAcquireOrStronger(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = false;
- IsLoad = true;
} else if (SI && isReleaseOrStronger(SI->getOrdering())) {
FenceOrdering = SI->getOrdering();
SI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = true;
- IsLoad = false;
} else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = IsLoad = true;
} else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
@@ -250,11 +245,10 @@ bool AtomicExpand::runOnFunction(Function &F) {
FenceOrdering = CASI->getSuccessOrdering();
CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
CASI->setFailureOrdering(AtomicOrdering::Monotonic);
- IsStore = IsLoad = true;
}
if (FenceOrdering != AtomicOrdering::Monotonic) {
- MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ MadeChange |= bracketInstWithFences(I, FenceOrdering);
}
}
@@ -320,13 +314,12 @@ bool AtomicExpand::runOnFunction(Function &F) {
return MadeChange;
}
-bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
- bool IsStore, bool IsLoad) {
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
IRBuilder<> Builder(I);
- auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
+ auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
- auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
+ auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
// The trailing fence is emitted before the instruction instead of after
// because there is no easy way of setting Builder insertion point after
// an instruction. So we must erase it from the BB, and insert it back
@@ -368,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *NewLI = Builder.CreateLoad(NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
- NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
@@ -451,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
NewSI->setAlignment(SI->getAlignment());
NewSI->setVolatile(SI->isVolatile());
- NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
SI->eraseFromParent();
return NewSI;
@@ -808,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
- CI->getFailureOrdering(), CI->getSynchScope());
+ CI->getFailureOrdering(), CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
// When we're building a strong cmpxchg, we need a loop, so you
// might think we could use a weak cmpxchg inside. But, using strong
@@ -931,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
CI->getSuccessOrdering(),
CI->getFailureOrdering(),
- CI->getSynchScope());
+ CI->getSyncScopeID());
NewCI->setVolatile(CI->isVolatile());
NewCI->setWeak(CI->isWeak());
DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
@@ -1048,8 +1041,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1064,8 +1056,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
@@ -1094,8 +1085,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// necessary.
Builder.SetInsertPoint(SuccessBB);
if (ShouldInsertFencesForAtomic)
- TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitTrailingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(NoStoreBB);
@@ -1107,8 +1097,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(FailureBB);
if (ShouldInsertFencesForAtomic)
- TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitTrailingFence(Builder, CI, FailureOrder);
Builder.CreateBr(ExitBB);
// Finally, we have control-flow based knowledge of whether the cmpxchg
@@ -1532,7 +1521,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Type *ResultTy;
SmallVector<Value *, 6> Args;
- AttributeSet Attr;
+ AttributeList Attr;
// 'size' argument.
if (!UseSizedLibcall) {
@@ -1593,7 +1582,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index a67e194..be93ff0 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,17 +15,15 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include <utility>
using namespace llvm;
-#define DEBUG_TYPE "basictti"
-
// This flag is used by the template base class for BasicTTIImpl, and here to
// provide a definition.
cl::opt<unsigned>
diff --git a/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp b/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp
new file mode 100644
index 0000000..2c41b59
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp
@@ -0,0 +1,758 @@
+//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Coalesce basic blocks guarded by the same branch condition into a single
+/// basic block.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-coalescing"
+
+static cl::opt<cl::boolOrDefault>
+ EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
+ cl::desc("enable coalescing of duplicate branches"));
+
+STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
+STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
+STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
+
+//===----------------------------------------------------------------------===//
+// BranchCoalescing
+//===----------------------------------------------------------------------===//
+///
+/// Improve scheduling by coalescing branches that depend on the same condition.
+/// This pass looks for blocks that are guarded by the same branch condition
+/// and attempts to merge the blocks together. Such opportunities arise from
+/// the expansion of select statements in the IR.
+///
+/// For example, consider the following LLVM IR:
+///
+/// %test = icmp eq i32 %x 0
+/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
+/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+///
+/// This IR expands to the following machine code on PowerPC:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// BCC 76, %vreg5, <BB#2>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(?%) BB#2(?%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#2(?%)
+///
+/// BB#2: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#3(?%) BB#4(?%)
+///
+/// BB#3: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2
+/// Successors according to CFG: BB#4(?%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2 BB#3
+/// %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// When this pattern is detected, branch coalescing will try to collapse
+/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
+///
+/// If all conditions are meet, IR should collapse to:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%)
+/// BB#4(0x55555554 / 0x80000000 = 66.67%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// Branch Coalescing does not split blocks, it moves everything in the same
+/// direction ensuring it does not break use/definition semantics.
+///
+/// PHI nodes and its corresponding use instructions are moved to its successor
+/// block if there are no uses within the successor block PHI nodes. PHI
+/// node ordering cannot be assumed.
+///
+/// Non-PHI can be moved up to the predecessor basic block or down to the
+/// successor basic block following any PHI instructions. Whether it moves
+/// up or down depends on whether the register(s) defined in the instructions
+/// are used in current block or in any PHI instructions at the beginning of
+/// the successor block.
+
+namespace {
+
+class BranchCoalescing : public MachineFunctionPass {
+ struct CoalescingCandidateInfo {
+ MachineBasicBlock *BranchBlock; // Block containing the branch
+ MachineBasicBlock *BranchTargetBlock; // Block branched to
+ MachineBasicBlock *FallThroughBlock; // Fall-through if branch not taken
+ SmallVector<MachineOperand, 4> Cond;
+ bool MustMoveDown;
+ bool MustMoveUp;
+
+ CoalescingCandidateInfo();
+ void clear();
+ };
+
+ MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ void initialize(MachineFunction &F);
+ bool canCoalesceBranch(CoalescingCandidateInfo &Cand);
+ bool identicalOperands(ArrayRef<MachineOperand> OperandList1,
+ ArrayRef<MachineOperand> OperandList2) const;
+ bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+
+ static bool isBranchCoalescingEnabled() {
+ return EnableBranchCoalescing == cl::BOU_TRUE;
+ }
+
+public:
+ static char ID;
+
+ BranchCoalescing() : MachineFunctionPass(ID) {
+ initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Branch Coalescing"; }
+
+ bool mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion);
+ bool canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+ void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB,
+ MachineBasicBlock *TargetRegionMBB);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char BranchCoalescing::ID = 0;
+char &llvm::BranchCoalescingID = BranchCoalescing::ID;
+
+INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
+ "Branch Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
+ false, false)
+
+BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
+ : BranchBlock(nullptr), BranchTargetBlock(nullptr),
+ FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
+
+void BranchCoalescing::CoalescingCandidateInfo::clear() {
+ BranchBlock = nullptr;
+ BranchTargetBlock = nullptr;
+ FallThroughBlock = nullptr;
+ Cond.clear();
+ MustMoveDown = false;
+ MustMoveUp = false;
+}
+
+void BranchCoalescing::initialize(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+///
+/// Analyze the branch statement to determine if it can be coalesced. This
+/// method analyses the branch statement for the given candidate to determine
+/// if it can be coalesced. If the branch can be coalesced, then the
+/// BranchTargetBlock and the FallThroughBlock are recorded in the specified
+/// Candidate.
+///
+///\param[in,out] Cand The coalescing candidate to analyze
+///\return true if and only if the branch can be coalesced, false otherwise
+///
+bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
+ DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
+ << " can be coalesced:");
+ MachineBasicBlock *FalseMBB = nullptr;
+
+ if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB,
+ Cand.Cond)) {
+ DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n");
+ return false;
+ }
+
+ for (auto &I : Cand.BranchBlock->terminators()) {
+ DEBUG(dbgs() << "Looking at terminator : " << I << "\n");
+ if (!I.isBranch())
+ continue;
+
+ if (I.getNumOperands() != I.getNumExplicitOperands()) {
+ DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
+ << "\n");
+ return false;
+ }
+ }
+
+ if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) {
+ DEBUG(dbgs() << "EH Pad - skip\n");
+ return false;
+ }
+
+ // For now only consider triangles (i.e, BranchTargetBlock is set,
+ // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
+ if (!Cand.BranchTargetBlock || FalseMBB ||
+ !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Does not form a triangle - skip\n");
+ return false;
+ }
+
+ // Ensure there are only two successors
+ if (Cand.BranchBlock->succ_size() != 2) {
+ DEBUG(dbgs() << "Does not have 2 successors - skip\n");
+ return false;
+ }
+
+ // Sanity check - the block must be able to fall through
+ assert(Cand.BranchBlock->canFallThrough() &&
+ "Expecting the block to fall through!");
+
+ // We have already ensured there are exactly two successors to
+ // BranchBlock and that BranchTargetBlock is a successor to BranchBlock.
+ // Ensure the single fall though block is empty.
+ MachineBasicBlock *Succ =
+ (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock)
+ ? *Cand.BranchBlock->succ_rbegin()
+ : *Cand.BranchBlock->succ_begin();
+
+ assert(Succ && "Expecting a valid fall-through block\n");
+
+ if (!Succ->empty()) {
+ DEBUG(dbgs() << "Fall-through block contains code -- skip\n");
+ return false;
+ }
+
+ if (!Succ->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs()
+ << "Successor of fall through block is not branch taken block\n");
+ return false;
+ }
+
+ Cand.FallThroughBlock = Succ;
+ DEBUG(dbgs() << "Valid Candidate\n");
+ return true;
+}
+
+///
+/// Determine if the two operand lists are identical
+///
+/// \param[in] OpList1 operand list
+/// \param[in] OpList2 operand list
+/// \return true if and only if the operands lists are identical
+///
+bool BranchCoalescing::identicalOperands(
+ ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
+
+ if (OpList1.size() != OpList2.size()) {
+ DEBUG(dbgs() << "Operand list is different size\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < OpList1.size(); ++i) {
+ const MachineOperand &Op1 = OpList1[i];
+ const MachineOperand &Op2 = OpList2[i];
+
+ DEBUG(dbgs() << "Op1: " << Op1 << "\n"
+ << "Op2: " << Op2 << "\n");
+
+ if (Op1.isIdenticalTo(Op2)) {
+ DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
+ continue;
+ }
+
+ // If the operands are not identical, but are registers, check to see if the
+ // definition of the register produces the same value. If they produce the
+ // same value, consider them to be identical.
+ if (Op1.isReg() && Op2.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
+ MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
+ MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
+ if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
+ DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def
+ << " produce the same value!\n");
+ } else {
+ DEBUG(dbgs() << "Operands produce different values\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "The operands are not provably identical.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+///
+/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB
+/// and update them to refer to the new block. PHI node ordering
+/// cannot be assumed so it does not matter where the PHI instructions
+/// are moved to in TargetMBB.
+///
+/// \param[in] SourceMBB block to move PHI instructions from
+/// \param[in] TargetMBB block to move PHI instructions to
+///
+void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
+ MachineBasicBlock *TargetMBB) {
+
+ MachineBasicBlock::iterator MI = SourceMBB->begin();
+ MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI();
+
+ if (MI == ME) {
+ DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n");
+ return;
+ }
+
+ // Update all PHI instructions in SourceMBB and move to top of TargetMBB
+ for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) {
+ MachineInstr &PHIInst = *Iter;
+ for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = PHIInst.getOperand(i);
+ if (MO.getMBB() == SourceMBB)
+ MO.setMBB(TargetMBB);
+ }
+ }
+ TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME);
+}
+
+///
+/// This function checks if MI can be moved to the beginning of the TargetMBB
+/// following PHI instructions. A MI instruction can be moved to beginning of
+/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to beginning of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Def : MI.defs()) { // Looking at Def
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == &TargetMBB) {
+ DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n");
+ return false;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the beginning.\n");
+ return true;
+}
+
+///
+/// This function checks if MI can be moved to the end of the TargetMBB,
+/// immediately before the first terminator. A MI instruction can be moved
+/// to then end of the TargetMBB if no PHI node defines what MI uses within
+/// it's own MBB.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to end of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to end of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Use : MI.uses()) {
+ if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
+ MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
+ if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
+ DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** def is in another block -- safe to move!\n");
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the end.\n");
+ return true;
+}
+
+///
+/// This method checks to ensure the two coalescing candidates follows the
+/// expected pattern required for coalescing.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion; false otherwise.
+///
+bool BranchCoalescing::validateCandidates(
+ CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+
+ if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock)
+ llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion");
+ else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock))
+ llvm_unreachable("Expecting TargetRegion to dominate SourceRegion");
+ else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock))
+ llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion");
+ else if (!TargetRegion.FallThroughBlock->empty() ||
+ !SourceRegion.FallThroughBlock->empty())
+ llvm_unreachable("Expecting fall-through blocks to be empty");
+
+ return true;
+}
+
+///
+/// This method determines whether the two coalescing candidates can be merged.
+/// In order to be merged, all instructions must be able to
+/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock;
+/// 2. Move to the end of the TargetRegion.BranchBlock.
+/// Merging involves moving the instructions in the
+/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock).
+///
+/// This function first try to move instructions from the
+/// TargetRegion.BranchTargetBlock down, to the beginning of the
+/// SourceRegion.BranchTargetBlock. This is not possible if any register defined
+/// in TargetRegion.BranchTargetBlock is used in a PHI node in the
+/// SourceRegion.BranchTargetBlock. In this case, check whether the statement
+/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately
+/// before the branch statement). If it cannot move, then these blocks cannot
+/// be merged.
+///
+/// Note that there is no analysis for moving instructions past the fall-through
+/// blocks because they are confirmed to be empty. An assert is thrown if they
+/// are not.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion, false otherwise.
+///
+bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Walk through PHI nodes first and see if they force the merge into the
+ // SourceRegion.BranchTargetBlock.
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->instr_begin(),
+ E = SourceRegion.BranchBlock->getFirstNonPHI();
+ I != E; ++I) {
+ for (auto &Def : I->defs())
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) {
+ DEBUG(dbgs() << "PHI " << *I << " defines register used in another "
+ "PHI within branch target block -- can't merge\n");
+ NumPHINotMoved++;
+ return false;
+ }
+ if (Use.getParent() == SourceRegion.BranchBlock) {
+ DEBUG(dbgs() << "PHI " << *I
+ << " defines register used in this "
+ "block -- all must move down\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+ }
+
+ // Walk through the MI to see if they should be merged into
+ // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down)
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->getFirstNonPHI(),
+ E = SourceRegion.BranchBlock->end();
+ I != E; ++I) {
+ if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move down - must move up!\n");
+ SourceRegion.MustMoveUp = true;
+ }
+ if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move up - must move down!\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+
+ return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true;
+}
+
+/// Merge the instructions from SourceRegion.BranchBlock,
+/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into
+/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and
+/// TargetRegion.FallThroughBlock respectively.
+///
+/// The successors for blocks in TargetRegion will be updated to use the
+/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion
+/// will be removed from the function.
+///
+/// A region consists of a BranchBlock, a FallThroughBlock, and a
+/// BranchTargetBlock. Branch coalesce works on patterns where the
+/// TargetRegion's BranchTargetBlock must also be the SourceRegions's
+/// BranchBlock.
+///
+/// Before mergeCandidates:
+///
+/// +---------------------------+
+/// | TargetRegion.BranchBlock |
+/// +---------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | TargetRegion.BranchTargetBlock |
+/// | SourceRegion.BranchBlock |
+/// +----------------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | SourceRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// After mergeCandidates:
+///
+/// +-----------------------------+
+/// | TargetRegion.BranchBlock |
+/// | SourceRegion.BranchBlock |
+/// +-----------------------------+
+/// / |
+/// / +---------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// | | SourceRegion.FallThroughBlock |
+/// \ +---------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// \param[in] SourceRegion The candidate to move blocks from
+/// \param[in] TargetRegion The candidate to move blocks to
+///
+bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) {
+
+ if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
+ llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!");
+ return false;
+ }
+
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Start the merging process by first handling the BranchBlock.
+ // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block
+ moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+
+ // Move remaining instructions in SourceRegion.BranchBlock into
+ // TargetRegion.BranchBlock
+ MachineBasicBlock::iterator firstInstr =
+ SourceRegion.BranchBlock->getFirstNonPHI();
+ MachineBasicBlock::iterator lastInstr =
+ SourceRegion.BranchBlock->getFirstTerminator();
+
+ MachineBasicBlock *Source = SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock
+ : TargetRegion.BranchBlock;
+
+ MachineBasicBlock::iterator Target =
+ SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock->getFirstNonPHI()
+ : TargetRegion.BranchBlock->getFirstTerminator();
+
+ Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr);
+
+ // Once PHI and instructions have been moved we need to clean up the
+ // control flow.
+
+ // Remove SourceRegion.FallThroughBlock before transferring successors of
+ // SourceRegion.BranchBlock to TargetRegion.BranchBlock.
+ SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock);
+ TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.BranchBlock);
+ // Update branch in TargetRegion.BranchBlock to jump to
+ // SourceRegion.BranchTargetBlock
+ // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock.
+ TargetRegion.BranchBlock->ReplaceUsesOfBlockWith(
+ SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+ // Remove the branch statement(s) in SourceRegion.BranchBlock
+ MachineBasicBlock::iterator I =
+ SourceRegion.BranchBlock->terminators().begin();
+ while (I != SourceRegion.BranchBlock->terminators().end()) {
+ MachineInstr &CurrInst = *I;
+ ++I;
+ if (CurrInst.isBranch())
+ CurrInst.eraseFromParent();
+ }
+
+ // Fall-through block should be empty since this is part of the condition
+ // to coalesce the branches.
+ assert(TargetRegion.FallThroughBlock->empty() &&
+ "FallThroughBlocks should be empty!");
+
+ // Transfer successor information and move PHIs down to the
+ // branch-taken block.
+ TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.FallThroughBlock);
+ TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
+
+ // Remove the blocks from the function.
+ assert(SourceRegion.BranchBlock->empty() &&
+ "Expecting branch block to be empty!");
+ SourceRegion.BranchBlock->eraseFromParent();
+
+ assert(SourceRegion.FallThroughBlock->empty() &&
+ "Expecting fall-through block to be empty!\n");
+ SourceRegion.FallThroughBlock->eraseFromParent();
+
+ NumBlocksCoalesced++;
+ return true;
+}
+
+bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
+
+ if (skipFunction(*MF.getFunction()) || MF.empty() ||
+ !isBranchCoalescingEnabled())
+ return false;
+
+ bool didSomething = false;
+
+ DEBUG(dbgs() << "******** Branch Coalescing ********\n");
+ initialize(MF);
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+
+ CoalescingCandidateInfo Cand1, Cand2;
+ // Walk over blocks and find candidates to merge
+ // Continue trying to merge with the first candidate found, as long as merging
+ // is successfull.
+ for (MachineBasicBlock &MBB : MF) {
+ bool MergedCandidates = false;
+ do {
+ MergedCandidates = false;
+ Cand1.clear();
+ Cand2.clear();
+
+ Cand1.BranchBlock = &MBB;
+
+ // If unable to coalesce the branch, then continue to next block
+ if (!canCoalesceBranch(Cand1))
+ break;
+
+ Cand2.BranchBlock = Cand1.BranchTargetBlock;
+ if (!canCoalesceBranch(Cand2))
+ break;
+
+ // Sanity check
+ // The branch-taken block of the second candidate should post-dominate the
+ // first candidate
+ assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
+ "Branch-taken block should post-dominate first candidate");
+
+ if (!identicalOperands(Cand1.Cond, Cand2.Cond)) {
+ DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and "
+ << Cand2.BranchBlock->getNumber()
+ << " have different branches\n");
+ break;
+ }
+ if (!canMerge(Cand2, Cand1)) {
+ DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand2.BranchBlock->getNumber() << "\n");
+ NumBlocksNotCoalesced++;
+ continue;
+ }
+ DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand1.BranchTargetBlock->getNumber() << "\n");
+ MergedCandidates = mergeCandidates(Cand2, Cand1);
+ if (MergedCandidates)
+ didSomething = true;
+
+ DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n");
+ } while (MergedCandidates);
+ }
+
+#ifndef NDEBUG
+ // Verify MF is still valid after branch coalescing
+ if (didSomething)
+ MF.verify(nullptr, "Error in code produced by branch coalescing");
+#endif // NDEBUG
+
+ DEBUG(dbgs() << "Finished Branch Coalescing\n");
+ return didSomething;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 6fba161..3c439e6 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1,4 +1,4 @@
-//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,37 +18,55 @@
//===----------------------------------------------------------------------===//
#include "BranchFolding.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
using namespace llvm;
-#define DEBUG_TYPE "branchfolding"
+#define DEBUG_TYPE "branch-folder"
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls, "Number of tail calls optimized");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -67,10 +85,12 @@ TailMergeSize("tail-merge-size",
cl::init(3), cl::Hidden);
namespace {
+
/// BranchFolderPass - Wrap branch folder in a machine function pass.
class BranchFolderPass : public MachineFunctionPass {
public:
static char ID;
+
explicit BranchFolderPass(): MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -82,12 +102,13 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-}
+
+} // end anonymous namespace
char BranchFolderPass::ID = 0;
char &llvm::BranchFolderPassID = BranchFolderPass::ID;
-INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
"Control Flow Optimizer", false, false)
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
@@ -123,8 +144,6 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
}
}
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
@@ -144,9 +163,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MLI->removeBlock(MBB);
}
-/// OptimizeFunction - Perhaps branch folding, tail merging and other
-/// CFG optimizations on the given function. Block placement changes the layout
-/// and may create new tail merging opportunities.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
@@ -156,13 +172,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TriedMerging.clear();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
MMI = mmi;
MLI = mli;
+ this->MRI = &MRI;
- MachineRegisterInfo &MRI = MF.getRegInfo();
UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
if (!UpdateLiveIns)
MRI.invalidateLiveness();
@@ -348,23 +365,18 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
if (UpdateLiveIns) {
NewDest->clearLiveIns();
- computeLiveIns(LiveRegs, *TRI, *NewDest);
+ computeLiveIns(LiveRegs, *MRI, *NewDest);
}
++NumTailMerge;
}
-/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
-/// MBB so that the part before the iterator falls into the part starting at the
-/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB) {
@@ -375,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Create the fall-through block.
MachineFunction::iterator MBBI = CurMBB.getIterator();
- MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB);
CurMBB.getParent()->insert(++MBBI, NewMBB);
// Move all the successors of this block to the specified block.
@@ -388,7 +400,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// NewMBB belongs to the same loop as CurMBB.
- if (MLI)
+ if (MLI)
if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
@@ -396,7 +408,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
if (UpdateLiveIns)
- computeLiveIns(LiveRegs, *TRI, *NewMBB);
+ computeLiveIns(LiveRegs, *MRI, *NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -436,7 +448,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = CurMBB->findBranchDebugLoc();
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
@@ -497,6 +509,15 @@ BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
return MBFI.printBlockFreq(OS, Freq);
}
+void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) {
+ MBFI.view(Name, isSimple);
+}
+
+uint64_t
+BranchFolder::MBFIWrapper::getEntryFreq() const {
+ return MBFI.getEntryFreq();
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -504,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &I) {
I = MBB->end();
unsigned NumTerms = 0;
- for (;;) {
+ while (true) {
if (I == MBB->begin()) {
I = MBB->end();
break;
@@ -516,6 +537,17 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
return NumTerms;
}
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
+ if (!MBB->succ_empty())
+ return false;
+ if (MBB->empty())
+ return true;
+ return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());
+}
+
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
@@ -570,6 +602,15 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
return true;
}
+ // If these are identical non-return blocks with no successors, merge them.
+ // Such blocks are typically cold calls to noreturn functions like abort, and
+ // are unlikely to become a fallthrough target after machine block placement.
+ // Tail merging these blocks is unlikely to create additional unconditional
+ // branches, and will reduce the size of this cold code.
+ if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
+ return true;
+
// If one of the blocks can be completely merged and happens to be in
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
@@ -579,6 +620,22 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
return true;
+ // If both blocks are identical and end in a branch, merge them unless they
+ // both have a fallthrough predecessor and successor.
+ // We can only do this after block placement because it depends on whether
+ // there are fallthroughs, and we don't know until after layout.
+ if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ auto BothFallThrough = [](MachineBasicBlock *MBB) {
+ if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ return false;
+ MachineFunction::iterator I(MBB);
+ MachineFunction *MF = MBB->getParent();
+ return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough();
+ };
+ if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))
+ return true;
+ }
+
// If both blocks have an unconditional branch temporarily stripped out,
// count that as an additional common instruction for the following
// heuristics. This heuristic is only accurate for single-succ blocks, so to
@@ -604,16 +661,6 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
-/// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element). Build the vector
-/// SameTails of all those that have the (same) largest number of instructions
-/// in common of any pair of these blocks. SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the
-/// instruction where the matching code sequence begins.
-/// Order of elements in SameTails is the reverse of the order in which
-/// those blocks appear in MergePotentials (where they are not necessarily
-/// consecutive).
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
unsigned MinCommonTailLength,
MachineBasicBlock *SuccBB,
@@ -650,8 +697,6 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
return maxCommonTailLength;
}
-/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
-/// MergePotentials, restoring branches at ends of blocks as appropriate.
void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
@@ -671,8 +716,6 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MergePotentials.erase(CurMPIter, MergePotentials.end());
}
-/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
-/// only of the common tail. Create a block that does by splitting one.
bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
@@ -723,6 +766,43 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
+void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+ for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+ if (i != commonTailIndex)
+ NextCommonInsts[i] = SameTails[i].getTailStartPos();
+ else {
+ assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+ "MBB is not a common tail only block");
+ }
+ }
+
+ for (auto &MI : *MBB) {
+ if (MI.isDebugValue())
+ continue;
+ DebugLoc DL = MI.getDebugLoc();
+ for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+ if (i == commonTailIndex)
+ continue;
+
+ auto &Pos = NextCommonInsts[i];
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ while (Pos->isDebugValue()) {
+ ++Pos;
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ }
+ assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+ DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+ NextCommonInsts[i] = ++Pos;
+ }
+ MI.setDebugLoc(DL);
+ }
+}
+
static void
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -875,10 +955,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
- // Remove the original debug location from the common tail.
- for (auto &MI : *MBB)
- if (!MI.isDebugValue())
- MI.setDebugLoc(DebugLoc());
+ // Merge debug locations across identical instructions for common tail.
+ MergeCommonTailDebugLocs(commonTailIndex);
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
@@ -1043,7 +1121,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = PBB->findBranchDebugLoc();
TII->removeBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
@@ -1193,8 +1271,6 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
return DebugLoc();
}
-/// OptimizeBlock - Analyze and optimize control flow related to the specified
-/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
@@ -1386,6 +1462,43 @@ ReoptimizeBlock:
}
}
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+ MF.getFunction()->optForSize()) {
+ // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+ // direction, thereby defeating careful block placement and regressing
+ // performance. Therefore, only consider this for optsize functions.
+ MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+ if (TII->isUnconditionalTailCall(TailCall)) {
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
+ PredTBB != PredFBB) {
+ // The predecessor has a conditional branch to this block which consists
+ // of only a tail call. Try to fold the tail call into the conditional
+ // branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch instruction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ ++NumTailCalls;
+ Pred->removeSuccessor(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ }
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
@@ -1508,7 +1621,6 @@ ReoptimizeBlock:
// block doesn't fall through into some other block, see if we can find a
// place to move this block where a fall-through will happen.
if (!PrevBB.canFallThrough()) {
-
// Now we know that there was no fall-through into this block, check to
// see if it has a fall-through into its successor.
bool CurFallsThru = MBB->canFallThrough();
@@ -1599,8 +1711,6 @@ ReoptimizeBlock:
// Hoist Common Code
//===----------------------------------------------------------------------===//
-/// HoistCommonCode - Hoist common instruction sequences at the start of basic
-/// blocks to their common predecessor.
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
@@ -1734,9 +1844,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
return PI;
}
-/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
-/// sequence at the start of the function, move the instructions before MBB
-/// terminator if it's legal.
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
@@ -1763,8 +1870,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
return false;
bool HasDups = false;
- SmallVector<unsigned, 4> LocalDefs;
- SmallSet<unsigned, 4> LocalDefsSet;
+ SmallVector<unsigned, 4> LocalDefs, LocalKills;
+ SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet;
MachineBasicBlock::iterator TIB = TBB->begin();
MachineBasicBlock::iterator FIB = FBB->begin();
MachineBasicBlock::iterator TIE = TBB->end();
@@ -1818,7 +1925,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
IsSafe = false;
break;
}
- } else if (!LocalDefsSet.count(Reg)) {
+ } else if (!ActiveDefsSet.count(Reg)) {
if (Defs.count(Reg)) {
// Use is defined by the instruction at the point of insertion.
IsSafe = false;
@@ -1838,18 +1945,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
break;
- // Remove kills from LocalDefsSet, these registers had short live ranges.
+ // Remove kills from ActiveDefsSet, these registers had short live ranges.
for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || !LocalDefsSet.count(Reg))
+ if (!Reg)
continue;
+ if (!AllDefsSet.count(Reg)) {
+ LocalKills.push_back(Reg);
+ continue;
+ }
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- LocalDefsSet.erase(*AI);
+ ActiveDefsSet.erase(*AI);
} else {
- LocalDefsSet.erase(Reg);
+ ActiveDefsSet.erase(Reg);
}
}
@@ -1861,7 +1972,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
continue;
LocalDefs.push_back(Reg);
- addRegAndItsAliases(Reg, TRI, LocalDefsSet);
+ addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
+ addRegAndItsAliases(Reg, TRI, AllDefsSet);
}
HasDups = true;
@@ -1876,17 +1988,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
FBB->erase(FBB->begin(), FIB);
// Update livein's.
- bool AddedLiveIns = false;
+ bool ChangedLiveIns = false;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Def = LocalDefs[i];
- if (LocalDefsSet.count(Def)) {
+ if (ActiveDefsSet.count(Def)) {
TBB->addLiveIn(Def);
FBB->addLiveIn(Def);
- AddedLiveIns = true;
+ ChangedLiveIns = true;
}
}
+ for (unsigned K : LocalKills) {
+ TBB->removeLiveIn(K);
+ FBB->removeLiveIn(K);
+ ChangedLiveIns = true;
+ }
- if (AddedLiveIns) {
+ if (ChangedLiveIns) {
TBB->sortUniqueLiveIns();
FBB->sortUniqueLiveIns();
}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index fc48e48..9268113 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -37,6 +37,9 @@ namespace llvm {
// flag. Ignored for optsize.
unsigned MinCommonTailLength = 0);
+ /// Perhaps branch folding, tail merging and other CFG optimizations on the
+ /// given function. Block placement changes the layout and may create new
+ /// tail merging opportunities.
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
MachineLoopInfo *mli = nullptr,
@@ -105,6 +108,7 @@ namespace llvm {
bool UpdateLiveIns;
unsigned MinCommonTailLength;
const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
MachineLoopInfo *MLI;
@@ -122,6 +126,8 @@ namespace llvm {
const MachineBasicBlock *MBB) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const;
+ void view(const Twine &Name, bool isSimple = true);
+ uint64_t getEntryFreq() const;
private:
const MachineBlockFrequencyInfo &MBFI;
@@ -137,26 +143,64 @@ namespace llvm {
MachineBasicBlock* PredBB,
unsigned MinCommonTailLength);
void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
+
+ /// Delete the instruction OldInst and everything after it, replacing it
+ /// with an unconditional branch to NewDest.
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
+
+ /// Given a machine basic block and an iterator into it, split the MBB so
+ /// that the part before the iterator falls into the part starting at the
+ /// iterator. This returns the new MBB.
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB);
+
+ /// Look through all the blocks in MergePotentials that have hash CurHash
+ /// (guaranteed to match the last element). Build the vector SameTails of
+ /// all those that have the (same) largest number of instructions in common
+ /// of any pair of these blocks. SameTails entries contain an iterator into
+ /// MergePotentials (from which the MachineBasicBlock can be found) and a
+ /// MachineBasicBlock::iterator into that MBB indicating the instruction
+ /// where the matching code sequence begins. Order of elements in SameTails
+ /// is the reverse of the order in which those blocks appear in
+ /// MergePotentials (where they are not necessarily consecutive).
unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB);
+
+ /// Remove all blocks with hash CurHash from MergePotentials, restoring
+ /// branches at ends of blocks as appropriate.
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+
+ /// None of the blocks to be tail-merged consist only of the common tail.
+ /// Create a block that does by splitting one.
bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
unsigned &commonTailIndex);
+ /// Create merged DebugLocs of identical instructions across SameTails and
+ /// assign it to the instruction in common tail.
+ void MergeCommonTailDebugLocs(unsigned commonTailIndex);
+
bool OptimizeBranches(MachineFunction &MF);
+
+ /// Analyze and optimize control flow related to the specified block. This
+ /// is never called on the entry block.
bool OptimizeBlock(MachineBasicBlock *MBB);
+
+ /// Remove the specified dead machine basic block from the function,
+ /// updating the CFG.
void RemoveDeadBlock(MachineBasicBlock *MBB);
+ /// Hoist common instruction sequences at the start of basic blocks to their
+ /// common predecessor.
bool HoistCommonCode(MachineFunction &MF);
+
+ /// If the successors of MBB has common instruction sequence at the start of
+ /// the function, move the instructions before MBB terminator if it's legal.
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
}
diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
index 8b27570..27ee12c 100644
--- a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -126,14 +126,16 @@ void BranchRelaxation::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void BranchRelaxation::dumpBBs() {
+LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
+#endif
/// scanFunction - Do the initial scan of the function, building up
/// information about each block.
@@ -257,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
- computeLiveIns(LiveRegs, *TRI, *NewBB);
+ computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB);
++NumSplit;
@@ -343,6 +345,10 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
// Do it here since if there's no split, no update is needed.
MBB->replaceSuccessor(FBB, &NewBB);
NewBB.addSuccessor(FBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB);
}
// We now have an appropriate fall-through block in place (either naturally or
diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
index ff7c99d..abac555 100644
--- a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
+++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
@@ -1,4 +1,4 @@
-//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===//
+//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GCs.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -77,6 +79,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -110,6 +113,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -117,7 +121,8 @@ public:
return (1 == PT->getAddressSpace());
}
};
-}
+
+} // end anonymous namespace
// Register all the above so that they can be found at runtime. Note that
// these static initializers are important since the registration list is
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index dc2d38a..c2ced19 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 2e33f14..7cad4d0 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -30,8 +30,7 @@ using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
: CallingConv(CC), IsVarArg(isVarArg), MF(mf),
- TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
- CallOrPrologue(Unknown) {
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
// No stack is used.
StackOffset = 0;
MaxStackArgAlign = 1;
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 4cf9b13..b7fd45a 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/InitializePasses.h"
#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
using namespace llvm;
@@ -21,6 +21,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
+ initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -31,14 +32,18 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeExpandPostRAPass(Registry);
+ initializeFEntryInserterPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
+ initializeImplicitNullChecksPass(Registry);
initializeInterleavedAccessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
+ initializeLiveRangeShrinkPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
@@ -47,7 +52,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCSEPass(Registry);
- initializeImplicitNullChecksPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
initializeMachineDominatorTreePass(Registry);
@@ -55,31 +59,35 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachineOptimizationRemarkEmitterPassPass(Registry);
+ initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
- initializeXRayInstrumentationPass(Registry);
- initializePatchableFunctionPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
+ initializePatchableFunctionPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
+ initializeRABasicPass(Registry);
+ initializeRAFastPass(Registry);
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
+ initializeSafeStackLegacyPassPass(Registry);
+ initializeScalarizeMaskedMemIntrinPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
- initializeLiveDebugValuesPass(Registry);
- initializeSafeStackPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
@@ -91,6 +99,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
initializeWinEHPreparePass(Registry);
+ initializeXRayInstrumentationPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 934b470..dc02a00 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -13,20 +13,23 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -53,8 +56,11 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -77,9 +83,14 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
+STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
+STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
+STATISTIC(NumMemCmpGreaterThanMax,
+ "Number of memcmp calls with size greater than max size");
+STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
+
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
@@ -93,7 +104,7 @@ static cl::opt<bool> DisableSelectToBranch(
cl::desc("Disable select to branch conversion."));
static cl::opt<bool> AddrSinkUsingGEPs(
- "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ "addr-sink-using-gep", cl::Hidden, cl::init(true),
cl::desc("Address sinking in CGP using GEPs."));
static cl::opt<bool> EnableAndCmpSinking(
@@ -123,7 +134,7 @@ static cl::opt<bool> DisablePreheaderProtect(
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
- "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<unsigned> FreqRatioToSkipMerge(
@@ -135,15 +146,29 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."), cl::init(true));
+
+static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
+ "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
+ cl::desc("The number of loads per basic block for inline expansion of "
+ "memcmp that is only being compared against zero."));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM;
+ const TargetSubtargetInfo *SubtargetInfo;
const TargetLowering *TLI;
+ const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
@@ -165,6 +190,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
/// True if CFG is modified in any way.
bool ModifiedDT;
@@ -176,10 +210,11 @@ class TypePromotionTransaction;
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
- initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
- }
+ CodeGenPrepare()
+ : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),
+ DL(nullptr) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "CodeGen Prepare"; }
@@ -200,13 +235,13 @@ class TypePromotionTransaction;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
- bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
- bool optimizeInst(Instruction *I, bool& ModifiedDT);
+ bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
+ bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction *I, Value *Addr,
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
- bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
+ bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@@ -215,26 +250,32 @@ class TypePromotionTransaction;
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
- bool sinkAndCmp(Function &F);
- bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
- Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstCost);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst,
+ bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ bool splitIndirectCriticalEdges(Function &F);
};
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
+ "Optimize for code generation", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
+INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
+ "Optimize for code generation", false, false)
-FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
- return new CodeGenPrepare(TM);
-}
+FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
bool CodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
@@ -250,8 +291,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset();
ModifiedDT = false;
- if (TM)
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ TM = &TPC->getTM<TargetMachine>();
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
+ }
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -260,10 +305,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ProfileGuidedSectionPrefix) {
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- if (PSI->isFunctionEntryHot(&F))
+ if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionEntryCold(&F))
- F.setSectionPrefix(".cold");
+ else if (PSI->isFunctionColdInCallGraph(&F))
+ F.setSectionPrefix(".unlikely");
}
/// This optimization identifies DIV instructions that can be
@@ -290,18 +335,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// find a node corresponding to the value.
EverMadeChange |= placeDbgValues(F);
- // If there is a mask, compare against zero, and branch that can be combined
- // into a single target instruction, push the mask and compare into branch
- // users. Do this before OptimizeBlock -> OptimizeInst ->
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts) {
- EverMadeChange |= sinkAndCmp(F);
+ if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
- }
+
+ // Split some critical edges where one of the sources is an indirect branch,
+ // to help generate sane code for PHIs involving such edges.
+ EverMadeChange |= splitIndirectCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -311,6 +357,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ModifiedDTOnIteration)
break;
}
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ I->deleteValue();
+
EverMadeChange |= MadeChange;
}
@@ -432,6 +485,160 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
return DestBB;
}
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // If the block doesn't have any PHIs, we don't care about it, since there's
+ // no point in splitting it.
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return nullptr;
+
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+ BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+// Split critical edges where the source of the edge is an indirectbr
+// instruction. This isn't always possible, but we can handle some easy cases.
+// This is useful because MI is unable to split such critical edges,
+// which means it will not be able to sink instructions along those edges.
+// This is especially painful for indirect branches with many successors, where
+// we end up having to prepare all outgoing values in the origin block.
+//
+// Our normal algorithm for splitting critical edges requires us to update
+// the outgoing edges of the edge origin block, but for an indirectbr this
+// is hard, since it would require finding and updating the block addresses
+// the indirect branch uses. But if a block only has a single indirectbr
+// predecessor, with the others being regular branches, we can do it in a
+// different way.
+// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
+// We can split D into D0 and D1, where D0 contains only the PHIs from D,
+// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
+// create the following structure:
+// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
+bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ for (BasicBlock *Pred : OtherPreds) {
+ // If the target is a loop to itself, then the terminator of the split
+ // block needs to be updated.
+ if (Pred == Target)
+ BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ else
+ Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+ }
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -1090,6 +1297,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
return false;
}
+/// Duplicate and sink the given 'and' instruction into user blocks where it is
+/// used in a compare to allow isel to generate better code for targets where
+/// this operation can be combined.
+///
+/// Return true if any changes are made.
+static bool sinkAndCmp0Expression(Instruction *AndI,
+ const TargetLowering &TLI,
+ SetOfInstrs &InsertedInsts) {
+ // Double-check that we're not trying to optimize an instruction that was
+ // already optimized by some other part of this pass.
+ assert(!InsertedInsts.count(AndI) &&
+ "Attempting to optimize already optimized and instruction");
+ (void) InsertedInsts;
+
+ // Nothing to do for single use in same basic block.
+ if (AndI->hasOneUse() &&
+ AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
+ return false;
+
+ // Try to avoid cases where sinking/duplicating is likely to increase register
+ // pressure.
+ if (!isa<ConstantInt>(AndI->getOperand(0)) &&
+ !isa<ConstantInt>(AndI->getOperand(1)) &&
+ AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
+ return false;
+
+ for (auto *U : AndI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ // Only sink for and mask feeding icmp with 0.
+ if (!isa<ICmpInst>(User))
+ return false;
+
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
+ if (!CmpC || !CmpC->isZero())
+ return false;
+ }
+
+ if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
+ return false;
+
+ DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ DEBUG(AndI->getParent()->dump());
+
+ // Push the 'and' into the same block as the icmp 0. There should only be
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
+ // others, so we don't need to keep track of which BBs we insert into.
+ for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+
+ // Keep the 'and' in the same place if the use is already in the same block.
+ Instruction *InsertPt =
+ User->getParent() == AndI->getParent() ? AndI : User;
+ Instruction *InsertedAnd =
+ BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
+ AndI->getOperand(1), "", InsertPt);
+ // Propagate the debug info.
+ InsertedAnd->setDebugLoc(AndI->getDebugLoc());
+
+ // Replace a use of the 'and' with a use of the new 'and'.
+ TheUse = InsertedAnd;
+ ++NumAndUses;
+ DEBUG(User->getParent()->dump());
+ }
+
+ // We removed all uses, nuke the and.
+ AndI->eraseFromParent();
+ return true;
+}
+
/// Check if the candidates could be combined with a shift instruction, which
/// includes:
/// 1. Truncate instruction
@@ -1278,519 +1562,6 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.load, label %else
-//
-//cond.load: ; preds = %0
-// %4 = getelementptr i32* %1, i32 0
-// %5 = load i32* %4
-// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
-// br label %else
-//
-//else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
-// %7 = extractelement <16 x i1> %mask, i32 1
-// %8 = icmp eq i1 %7, true
-// br i1 %8, label %cond.load1, label %else2
-//
-//cond.load1: ; preds = %else
-// %9 = getelementptr i32* %1, i32 1
-// %10 = load i32* %9
-// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
-// br label %else2
-//
-//else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
-// %12 = extractelement <16 x i1> %mask, i32 2
-// %13 = icmp eq i1 %12, true
-// br i1 %13, label %cond.load4, label %else5
-//
-static void scalarizeMaskedLoad(CallInst *CI) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- assert(VecType && "Unexpected return type of masked load intrinsic");
-
- Type *EltTy = CI->getType()->getVectorElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- bool IsAllOnesMask = isa<Constant>(Mask) &&
- cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
- Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
- // Bitcast %addr fron i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
-
- Value *UndefVal = UndefValue::get(VecType);
-
- // The result vector
- Value *VResult = UndefVal;
-
- if (isa<ConstantVector>(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx));
- }
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_load = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.load, label %else
- //
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
-
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
- }
-
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
-}
-
-// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.store, label %else
-//
-// cond.store: ; preds = %0
-// %4 = extractelement <16 x i32> %val, i32 0
-// %5 = getelementptr i32* %1, i32 0
-// store i32 %4, i32* %5
-// br label %else
-//
-// else: ; preds = %0, %cond.store
-// %6 = extractelement <16 x i1> %mask, i32 1
-// %7 = icmp eq i1 %6, true
-// br i1 %7, label %cond.store1, label %else2
-//
-// cond.store1: ; preds = %else
-// %8 = extractelement <16 x i32> %val, i32 1
-// %9 = getelementptr i32* %1, i32 1
-// store i32 %8, i32* %9
-// br label %else2
-// . . .
-static void scalarizeMaskedStore(CallInst *CI) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- assert(VecType && "Unexpected data type in masked store intrinsic");
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- bool IsAllOnesMask = isa<Constant>(Mask) &&
- cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
- Builder.CreateAlignedStore(Src, Ptr, AlignVal);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
- // Bitcast %addr fron i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
-
- if (isa<ConstantVector>(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_store, label %cond.store, label %else
- //
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-}
-
-// Translate a masked gather intrinsic like
-// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
-// <16 x i1> %Mask, <16 x i32> %Src)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> %Mask, i32 0
-// % ToLoad0 = icmp eq i1 % Mask0, true
-// br i1 % ToLoad0, label %cond.load, label %else
-//
-// cond.load:
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// % Load0 = load i32, i32* % Ptr0, align 4
-// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
-// br label %else
-//
-// else:
-// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
-// % Mask1 = extractelement <16 x i1> %Mask, i32 1
-// % ToLoad1 = icmp eq i1 % Mask1, true
-// br i1 % ToLoad1, label %cond.load1, label %else2
-//
-// cond.load1:
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// % Load1 = load i32, i32* % Ptr1, align 4
-// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
-// br label %else2
-// . . .
-// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
-// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI) {
- Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-
- assert(VecType && "Unexpected return type of masked load intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
-
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- Value *UndefVal = UndefValue::get(VecType);
-
- // The result vector
- Value *VResult = UndefVal;
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
- "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx),
- "Res" + Twine(Idx));
- }
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = extractelement <16 x i1> %Mask, i32 1
- // %ToLoad1 = icmp eq i1 %Mask1, true
- // br i1 %ToLoad1, label %cond.load, label %else
- //
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
-
- Value *Predicate = Builder.CreateExtractElement(Mask,
- Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToLoad" + Twine(Idx));
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
- "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
- "Res" + Twine(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
- }
-
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
-}
-
-// Translate a masked scatter intrinsic, like
-// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
-// <16 x i1> %Mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set.
-//
-// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> % Mask, i32 0
-// % ToStore0 = icmp eq i1 % Mask0, true
-// br i1 %ToStore0, label %cond.store, label %else
-//
-// cond.store:
-// % Elt0 = extractelement <16 x i32> %Src, i32 0
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* % Ptr0, align 4
-// br label %else
-//
-// else:
-// % Mask1 = extractelement <16 x i1> % Mask, i32 1
-// % ToStore1 = icmp eq i1 % Mask1, true
-// br i1 % ToStore1, label %cond.store1, label %else2
-//
-// cond.store1:
-// % Elt1 = extractelement <16 x i32> %Src, i32 1
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 % Elt1, i32* % Ptr1, align 4
-// br label %else2
-// . . .
-static void scalarizeMaskedScatter(CallInst *CI) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- assert(isa<VectorType>(Src->getType()) &&
- "Unexpected data type in masked scatter intrinsic");
- assert(isa<VectorType>(Ptrs->getType()) &&
- isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
- "Vector of pointers is expected in masked scatter intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- unsigned VectorWidth = Src->getType()->getVectorNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
- // % ToStore = icmp eq i1 % Mask1, true
- // br i1 % ToStore, label %cond.store, label %else
- //
- Value *Predicate = Builder.CreateExtractElement(Mask,
- Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
- Value *Cmp =
- Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToStore" + Twine(Idx));
-
- // Create "cond" block
- //
- // % Elt1 = extractelement <16 x i32> %Src, i32 1
- // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 % Elt1, i32* % Ptr1
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-}
-
/// If counting leading or trailing zeros is an expensive operation and a zero
/// input is defined, add a check for zero to avoid calling the intrinsic.
///
@@ -1870,7 +1641,657 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return true;
}
-bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+// This class provides helper functions to expand a memcmp library call into an
+// inline expansion.
+class MemCmpExpansion {
+ struct ResultBlock {
+ BasicBlock *BB;
+ PHINode *PhiSrc1;
+ PHINode *PhiSrc2;
+ ResultBlock();
+ };
+
+ CallInst *CI;
+ ResultBlock ResBlock;
+ unsigned MaxLoadSize;
+ unsigned NumBlocks;
+ unsigned NumBlocksNonOneByte;
+ unsigned NumLoadsPerBlock;
+ std::vector<BasicBlock *> LoadCmpBlocks;
+ BasicBlock *EndBlock;
+ PHINode *PhiRes;
+ bool IsUsedForZeroCmp;
+ const DataLayout &DL;
+ IRBuilder<> Builder;
+
+ unsigned calculateNumBlocks(unsigned Size);
+ void createLoadCmpBlocks();
+ void createResultBlock();
+ void setupResultBlockPHINodes();
+ void setupEndBlockPHINodes();
+ void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex);
+ Value *getCompareLoadPairs(unsigned Index, unsigned Size,
+ unsigned &NumBytesProcessed);
+ void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
+ unsigned &NumBytesProcessed);
+ void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
+ void emitMemCmpResultBlock();
+ Value *getMemCmpExpansionZeroCase(unsigned Size);
+ Value *getMemCmpEqZeroOneBlock(unsigned Size);
+ Value *getMemCmpOneBlock(unsigned Size);
+ unsigned getLoadSize(unsigned Size);
+ unsigned getNumLoads(unsigned Size);
+
+public:
+ MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
+ unsigned NumLoadsPerBlock, const DataLayout &DL);
+ Value *getMemCmpExpansion(uint64_t Size);
+};
+
+MemCmpExpansion::ResultBlock::ResultBlock()
+ : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}
+
+// Initialize the basic block structure required for expansion of memcmp call
+// with given maximum load size and memcmp size parameter.
+// This structure includes:
+// 1. A list of load compare blocks - LoadCmpBlocks.
+// 2. An EndBlock, split from original instruction point, which is the block to
+// return from.
+// 3. ResultBlock, block to branch to for early exit when a
+// LoadCmpBlock finds a difference.
+MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
+ unsigned MaxLoadSize, unsigned LoadsPerBlock,
+ const DataLayout &TheDataLayout)
+ : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
+ DL(TheDataLayout), Builder(CI) {
+
+ // A memcmp with zero-comparison with only one block of load and compare does
+ // not need to set up any extra blocks. This case could be handled in the DAG,
+ // but since we have all of the machinery to flexibly expand any memcpy here,
+ // we choose to handle this case too to avoid fragmented lowering.
+ IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+ NumBlocks = calculateNumBlocks(Size);
+ if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) {
+ BasicBlock *StartBlock = CI->getParent();
+ EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+ setupEndBlockPHINodes();
+ createResultBlock();
+
+ // If return value of memcmp is not used in a zero equality, we need to
+ // calculate which source was larger. The calculation requires the
+ // two loaded source values of each load compare block.
+ // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+ if (!IsUsedForZeroCmp)
+ setupResultBlockPHINodes();
+
+ // Create the number of required load compare basic blocks.
+ createLoadCmpBlocks();
+
+ // Update the terminator added by splitBasicBlock to branch to the first
+ // LoadCmpBlock.
+ StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+ }
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+}
+
+void MemCmpExpansion::createLoadCmpBlocks() {
+ for (unsigned i = 0; i < NumBlocks; i++) {
+ BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
+ EndBlock->getParent(), EndBlock);
+ LoadCmpBlocks.push_back(BB);
+ }
+}
+
+void MemCmpExpansion::createResultBlock() {
+ ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
+ EndBlock->getParent(), EndBlock);
+}
+
+// This function creates the IR instructions for loading and comparing 1 byte.
+// It loads 1 byte from each source of the memcmp parameters with the given
+// GEPIndex. It then subtracts the two loaded values and adds this result to the
+// final phi node for selecting the memcmp result.
+void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
+ unsigned GEPIndex) {
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+ Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex.
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
+ Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
+
+ PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
+
+ if (Index < (LoadCmpBlocks.size() - 1)) {
+ // Early exit branch if difference found to EndBlock. Otherwise, continue to
+ // next LoadCmpBlock,
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
+ ConstantInt::get(Diff->getType(), 0));
+ BranchInst *CmpBr =
+ BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
+ Builder.Insert(CmpBr);
+ } else {
+ // The last block has an unconditional branch to EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
+ }
+}
+
+unsigned MemCmpExpansion::getNumLoads(unsigned Size) {
+ return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);
+}
+
+unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
+ return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
+}
+
+/// Generate an equality comparison for one or more pairs of loaded values.
+/// This is used in the case where the memcmp() call is compared equal or not
+/// equal to zero.
+Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
+ unsigned &NumBytesProcessed) {
+ std::vector<Value *> XorList, OrList;
+ Value *Diff;
+
+ unsigned RemainingBytes = Size - NumBytesProcessed;
+ unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
+ unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
+
+ // For a single-block expansion, start inserting before the memcmp call.
+ if (LoadCmpBlocks.empty())
+ Builder.SetInsertPoint(CI);
+ else
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+
+ Value *Cmp = nullptr;
+ for (unsigned i = 0; i < NumLoads; ++i) {
+ unsigned LoadSize = getLoadSize(RemainingBytes);
+ unsigned GEPIndex = NumBytesProcessed / LoadSize;
+ NumBytesProcessed += LoadSize;
+ RemainingBytes -= LoadSize;
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex.
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ // Get a constant or load a value for each source address.
+ Value *LoadSrc1 = nullptr;
+ if (auto *Source1C = dyn_cast<Constant>(Source1))
+ LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+ if (!LoadSrc1)
+ LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+ Value *LoadSrc2 = nullptr;
+ if (auto *Source2C = dyn_cast<Constant>(Source2))
+ LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+ if (!LoadSrc2)
+ LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (NumLoads != 1) {
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+ }
+ // If we have multiple loads per block, we need to generate a composite
+ // comparison using xor+or.
+ Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+ Diff = Builder.CreateZExt(Diff, MaxLoadType);
+ XorList.push_back(Diff);
+ } else {
+ // If there's only one load per block, we just compare the loaded values.
+ Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ }
+ }
+
+ auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
+ std::vector<Value *> OutList;
+ for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
+ Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
+ OutList.push_back(Or);
+ }
+ if (InList.size() % 2 != 0)
+ OutList.push_back(InList.back());
+ return OutList;
+ };
+
+ if (!Cmp) {
+ // Pairwise OR the XOR results.
+ OrList = pairWiseOr(XorList);
+
+ // Pairwise OR the OR results until one result left.
+ while (OrList.size() != 1) {
+ OrList = pairWiseOr(OrList);
+ }
+ Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
+ }
+
+ return Cmp;
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
+ unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
+ Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
+
+ BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[Index + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise,
+ // continue to next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (Index == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
+ }
+}
+
+// This function creates the IR intructions for loading and comparing using the
+// given LoadSize. It loads the number of bytes specified by LoadSize from each
+// source of the memcmp parameters. It then does a subtract to see if there was
+// a difference in the loaded values. If a difference is found, it branches
+// with an early exit to the ResultBlock for calculating which source was
+// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
+// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
+// a special case through emitLoadCompareByteBlock. The special handling can
+// simply subtract the loaded values and add it to the result phi node.
+void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
+ unsigned GEPIndex) {
+ if (LoadSize == 1) {
+ MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
+ return;
+ }
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ assert(LoadSize <= MaxLoadSize && "Unexpected load type");
+
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Get the base address using the GEPIndex.
+ if (GEPIndex != 0) {
+ Source1 = Builder.CreateGEP(LoadSizeType, Source1,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ Source2 = Builder.CreateGEP(LoadSizeType, Source2,
+ ConstantInt::get(LoadSizeType, GEPIndex));
+ }
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian()) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ if (LoadSizeType != MaxLoadType) {
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
+ }
+
+ // Add the loaded values to the phi nodes for calculating memcmp result only
+ // if result is not used in a zero equality.
+ if (!IsUsedForZeroCmp) {
+ ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);
+ ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
+ }
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);
+ BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
+ ? EndBlock
+ : LoadCmpBlocks[Index + 1];
+ // Early exit branch if difference found to ResultBlock. Otherwise, continue
+ // to next LoadCmpBlock or EndBlock.
+ BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+ Builder.Insert(CmpBr);
+
+ // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
+ // since early exit to ResultBlock was not taken (no difference was found in
+ // any of the bytes).
+ if (Index == LoadCmpBlocks.size() - 1) {
+ Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
+ PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
+ }
+}
+
+// This function populates the ResultBlock with a sequence to calculate the
+// memcmp result. It compares the two loaded source values and returns -1 if
+// src1 < src2 and 1 if src1 > src2.
+void MemCmpExpansion::emitMemCmpResultBlock() {
+ // Special case: if memcmp result is used in a zero equality, result does not
+ // need to be calculated and can simply return 1.
+ if (IsUsedForZeroCmp) {
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+ Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ return;
+ }
+ BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
+ Builder.SetInsertPoint(ResBlock.BB, InsertPt);
+
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
+ ResBlock.PhiSrc2);
+
+ Value *Res =
+ Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+
+ BranchInst *NewBr = BranchInst::Create(EndBlock);
+ Builder.Insert(NewBr);
+ PhiRes->addIncoming(Res, ResBlock.BB);
+}
+
+unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
+ unsigned NumBlocks = 0;
+ bool HaveOneByteLoad = false;
+ unsigned RemainingSize = Size;
+ unsigned LoadSize = MaxLoadSize;
+ while (RemainingSize) {
+ if (LoadSize == 1)
+ HaveOneByteLoad = true;
+ NumBlocks += RemainingSize / LoadSize;
+ RemainingSize = RemainingSize % LoadSize;
+ LoadSize = LoadSize / 2;
+ }
+ NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
+
+ if (IsUsedForZeroCmp)
+ NumBlocks = NumBlocks / NumLoadsPerBlock +
+ (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);
+
+ return NumBlocks;
+}
+
+void MemCmpExpansion::setupResultBlockPHINodes() {
+ Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
+ Builder.SetInsertPoint(ResBlock.BB);
+ ResBlock.PhiSrc1 =
+ Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");
+ ResBlock.PhiSrc2 =
+ Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");
+}
+
+void MemCmpExpansion::setupEndBlockPHINodes() {
+ Builder.SetInsertPoint(&EndBlock->front());
+ PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
+}
+
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
+ unsigned NumBytesProcessed = 0;
+ // This loop populates each of the LoadCmpBlocks with the IR sequence to
+ // handle multiple loads per block.
+ for (unsigned i = 0; i < NumBlocks; ++i)
+ emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+/// A memcmp expansion that compares equality with 0 and only has one block of
+/// load and compare can bypass the compare, branch, and phi IR that is required
+/// in the general case.
+Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
+ unsigned NumBytesProcessed = 0;
+ Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
+ return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
+}
+
+/// A memcmp expansion that only has one block of load and compare can bypass
+/// the compare, branch, and phi IR that is required in the general case.
+Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
+ assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block");
+
+ Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
+ Value *Source1 = CI->getArgOperand(0);
+ Value *Source2 = CI->getArgOperand(1);
+
+ // Cast source to LoadSizeType*.
+ if (Source1->getType() != LoadSizeType)
+ Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
+ if (Source2->getType() != LoadSizeType)
+ Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
+
+ // Load LoadSizeType from the base address.
+ Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+ Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
+ if (DL.isLittleEndian() && Size != 1) {
+ Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::bswap, LoadSizeType);
+ LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
+ LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
+ }
+
+ // TODO: Instead of comparing ULT, just subtract and return the difference?
+ Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
+ Type *I32 = Builder.getInt32Ty();
+ Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
+ ConstantInt::get(I32, 1));
+ return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+}
+
+// This function expands the memcmp call into an inline expansion and returns
+// the memcmp result.
+Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
+ if (IsUsedForZeroCmp)
+ return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
+ getMemCmpExpansionZeroCase(Size);
+
+ // TODO: Handle more than one load pair per block in getMemCmpOneBlock().
+ if (NumBlocks == 1 && NumLoadsPerBlock == 1)
+ return getMemCmpOneBlock(Size);
+
+ // This loop calls emitLoadCompareBlock for comparing Size bytes of the two
+ // memcmp sources. It starts with loading using the maximum load size set by
+ // the target. It processes any remaining bytes using a load size which is the
+ // next smallest power of 2.
+ unsigned LoadSize = MaxLoadSize;
+ unsigned NumBytesToBeProcessed = Size;
+ unsigned Index = 0;
+ while (NumBytesToBeProcessed) {
+ // Calculate how many blocks we can create with the current load size.
+ unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
+ unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
+ NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
+
+ // For each NumBlocks, populate the instruction sequence for loading and
+ // comparing LoadSize bytes.
+ while (NumBlocks--) {
+ emitLoadCompareBlock(Index, LoadSize, GEPIndex);
+ Index++;
+ GEPIndex++;
+ }
+ // Get the next LoadSize to use.
+ LoadSize = LoadSize / 2;
+ }
+
+ emitMemCmpResultBlock();
+ return PhiRes;
+}
+
+// This function checks to see if an expansion of memcmp can be generated.
+// It checks for constant compare size that is less than the max inline size.
+// If an expansion cannot occur, returns false to leave as a library call.
+// Otherwise, the library call is replaced with a new IR instruction sequence.
+/// We want to transform:
+/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
+/// To:
+/// loadbb:
+/// %0 = bitcast i32* %buffer2 to i8*
+/// %1 = bitcast i32* %buffer1 to i8*
+/// %2 = bitcast i8* %1 to i64*
+/// %3 = bitcast i8* %0 to i64*
+/// %4 = load i64, i64* %2
+/// %5 = load i64, i64* %3
+/// %6 = call i64 @llvm.bswap.i64(i64 %4)
+/// %7 = call i64 @llvm.bswap.i64(i64 %5)
+/// %8 = sub i64 %6, %7
+/// %9 = icmp ne i64 %8, 0
+/// br i1 %9, label %res_block, label %loadbb1
+/// res_block: ; preds = %loadbb2,
+/// %loadbb1, %loadbb
+/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
+/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
+/// %10 = icmp ult i64 %phi.src1, %phi.src2
+/// %11 = select i1 %10, i32 -1, i32 1
+/// br label %endblock
+/// loadbb1: ; preds = %loadbb
+/// %12 = bitcast i32* %buffer2 to i8*
+/// %13 = bitcast i32* %buffer1 to i8*
+/// %14 = bitcast i8* %13 to i32*
+/// %15 = bitcast i8* %12 to i32*
+/// %16 = getelementptr i32, i32* %14, i32 2
+/// %17 = getelementptr i32, i32* %15, i32 2
+/// %18 = load i32, i32* %16
+/// %19 = load i32, i32* %17
+/// %20 = call i32 @llvm.bswap.i32(i32 %18)
+/// %21 = call i32 @llvm.bswap.i32(i32 %19)
+/// %22 = zext i32 %20 to i64
+/// %23 = zext i32 %21 to i64
+/// %24 = sub i64 %22, %23
+/// %25 = icmp ne i64 %24, 0
+/// br i1 %25, label %res_block, label %loadbb2
+/// loadbb2: ; preds = %loadbb1
+/// %26 = bitcast i32* %buffer2 to i8*
+/// %27 = bitcast i32* %buffer1 to i8*
+/// %28 = bitcast i8* %27 to i16*
+/// %29 = bitcast i8* %26 to i16*
+/// %30 = getelementptr i16, i16* %28, i16 6
+/// %31 = getelementptr i16, i16* %29, i16 6
+/// %32 = load i16, i16* %30
+/// %33 = load i16, i16* %31
+/// %34 = call i16 @llvm.bswap.i16(i16 %32)
+/// %35 = call i16 @llvm.bswap.i16(i16 %33)
+/// %36 = zext i16 %34 to i64
+/// %37 = zext i16 %35 to i64
+/// %38 = sub i64 %36, %37
+/// %39 = icmp ne i64 %38, 0
+/// br i1 %39, label %res_block, label %loadbb3
+/// loadbb3: ; preds = %loadbb2
+/// %40 = bitcast i32* %buffer2 to i8*
+/// %41 = bitcast i32* %buffer1 to i8*
+/// %42 = getelementptr i8, i8* %41, i8 14
+/// %43 = getelementptr i8, i8* %40, i8 14
+/// %44 = load i8, i8* %42
+/// %45 = load i8, i8* %43
+/// %46 = zext i8 %44 to i32
+/// %47 = zext i8 %45 to i32
+/// %48 = sub i32 %46, %47
+/// br label %endblock
+/// endblock: ; preds = %res_block,
+/// %loadbb3
+/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
+/// ret i32 %phi.res
+static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
+ const TargetLowering *TLI, const DataLayout *DL) {
+ NumMemCmpCalls++;
+
+ // TTI call to check if target would like to expand memcmp. Also, get the
+ // MaxLoadSize.
+ unsigned MaxLoadSize;
+ if (!TTI->expandMemCmp(CI, MaxLoadSize))
+ return false;
+
+ // Early exit from expansion if -Oz.
+ if (CI->getFunction()->optForMinSize())
+ return false;
+
+ // Early exit from expansion if size is not a constant.
+ ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ if (!SizeCast) {
+ NumMemCmpNotConstant++;
+ return false;
+ }
+
+ // Early exit from expansion if size greater than max bytes to load.
+ uint64_t SizeVal = SizeCast->getZExtValue();
+ unsigned NumLoads = 0;
+ unsigned RemainingSize = SizeVal;
+ unsigned LoadSize = MaxLoadSize;
+ while (RemainingSize) {
+ NumLoads += RemainingSize / LoadSize;
+ RemainingSize = RemainingSize % LoadSize;
+ LoadSize = LoadSize / 2;
+ }
+
+ if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
+ NumMemCmpGreaterThanMax++;
+ return false;
+ }
+
+ NumMemCmpInlined++;
+
+ // MemCmpHelper object creates and sets up basic blocks required for
+ // expanding memcmp with size SizeVal.
+ unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
+ MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);
+
+ Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);
+
+ // Replace call with result of expansion and erase call.
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+
+ return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
@@ -1955,10 +2376,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
ConstantInt *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
// Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case
+ // this
// happens.
Value *CurValue = &*CurInstIterator;
- WeakVH IterHandle(CurValue);
+ WeakTrackingVH IterHandle(CurValue);
replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
@@ -1970,39 +2392,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
return true;
}
- case Intrinsic::masked_load: {
- // Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType())) {
- scalarizeMaskedLoad(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_store: {
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedStore(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_gather: {
- if (!TTI->isLegalMaskedGather(CI->getType())) {
- scalarizeMaskedGather(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_scatter: {
- if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedScatter(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -2028,16 +2417,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
if (TLI) {
- // Unknown address space.
- // TODO: Target hook to pick which address space the intrinsic cares
- // about?
- unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
- while (!PtrOps.empty())
- if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
return true;
+ }
}
}
@@ -2054,6 +2442,13 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
CI->eraseFromParent();
return true;
}
+
+ LibFunc Func;
+ if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&
+ Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {
+ ModifiedDT = true;
+ return true;
+ }
return false;
}
@@ -2168,11 +2563,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
+ AttributeList CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
@@ -2561,25 +2956,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
- InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr) {
+ Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
- /// \brief Really remove the instruction.
- void commit() override { delete Inst; }
-
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -2588,6 +2988,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
+ RemovedInsts.erase(Inst);
}
};
@@ -2596,6 +2997,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@@ -2627,6 +3032,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+ SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@@ -2638,7 +3044,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+ RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@@ -2705,8 +3112,8 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetMachine &TM;
const TargetLowering &TLI;
+ const TargetRegisterInfo &TRI;
const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -2731,14 +3138,14 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, unsigned AS,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
+ Type *AT, unsigned AS,
Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TM(TM),
- TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
- ->getTargetLowering()),
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
PromotedInsts(PromotedInsts), TPT(TPT) {
@@ -2756,13 +3163,15 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetMachine &TM,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
+ AccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -3583,18 +3992,18 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
/// Check to see if all uses of OpVal by the specified inline asm call are due
/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetMachine &TM) {
- const Function *F = CI->getParent()->getParent();
- const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI) {
+ const Function *F = CI->getFunction();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
ImmutableCallSite(CI));
+
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -3607,13 +4016,18 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
+// Max number of memory uses to look at before aborting the search to conserve
+// compile time.
+static constexpr int MaxMemoryUsesToScan = 20;
+
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
/// Add the ultimately found memory instructions to MemoryUses.
static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
- SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI, int SeenInsts = 0) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -3626,8 +4040,12 @@ static bool FindAllMemoryUses(
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
- Instruction *UserI = cast<Instruction>(U.getUser());
+ // Conservatively return true if we're seeing a large number or a deep chain
+ // of users. This avoids excessive compilation times in pathological cases.
+ if (SeenInsts++ >= MaxMemoryUsesToScan)
+ return true;
+ Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
continue;
@@ -3635,11 +4053,28 @@ static bool FindAllMemoryUses(
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
unsigned opNo = U.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
+ if (opNo != StoreInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(RMW, opNo));
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
@@ -3650,12 +4085,13 @@ static bool FindAllMemoryUses(
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TM))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
+ SeenInsts))
return true;
}
@@ -3743,7 +4179,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -3775,7 +4211,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
+ AddressAccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -3839,84 +4276,70 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
- Value *Consensus = nullptr;
- unsigned NumUsesConsensus = 0;
- bool IsNumUsesConsensusValid = false;
+ bool AddrModeFound = false;
+ bool PhiSeen = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
- TypePromotionTransaction TPT;
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
Value *V = worklist.back();
worklist.pop_back();
- // Break use-def graph loops.
- if (!Visited.insert(V).second) {
- Consensus = nullptr;
- break;
- }
+ // We allow traversing cyclic Phi nodes.
+ // In case of success after this loop we ensure that traversing through
+ // Phi nodes ends up with all cases to compute address of the form
+ // BaseGV + Base + Scale * Index + Offset
+ // where Scale and Offset are constans and BaseGV, Base and Index
+ // are exactly the same Values in all cases.
+ // It means that BaseGV, Scale and Offset dominate our memory instruction
+ // and have the same value as they had in address computation represented
+ // as Phi. So we can safely sink address computation to memory instruction.
+ if (!Visited.insert(V).second)
+ continue;
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (Value *IncValue : P->incoming_values())
worklist.push_back(IncValue);
+ PhiSeen = true;
continue;
}
// For non-PHIs, determine the addressing mode being computed. Note that
// the result may differ depending on what other uses our candidate
// addressing instructions might have.
- SmallVector<Instruction*, 16> NewAddrModeInsts;
+ AddrModeInsts.clear();
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
- InsertedInsts, PromotedInsts, TPT);
-
- // This check is broken into two cases with very similar code to avoid using
- // getNumUses() as much as possible. Some values have a lot of uses, so
- // calling getNumUses() unconditionally caused a significant compile-time
- // regression.
- if (!Consensus) {
- Consensus = V;
- AddrMode = NewAddrMode;
- AddrModeInsts = NewAddrModeInsts;
- continue;
- } else if (NewAddrMode == AddrMode) {
- if (!IsNumUsesConsensusValid) {
- NumUsesConsensus = Consensus->getNumUses();
- IsNumUsesConsensusValid = true;
- }
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
+ InsertedInsts, PromotedInsts, TPT);
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher
- // applicable.
- unsigned NumUses = V->getNumUses();
- if (NumUses > NumUsesConsensus) {
- Consensus = V;
- NumUsesConsensus = NumUses;
- AddrModeInsts = NewAddrModeInsts;
- }
+ if (!AddrModeFound) {
+ AddrModeFound = true;
+ AddrMode = NewAddrMode;
continue;
}
+ if (NewAddrMode == AddrMode)
+ continue;
- Consensus = nullptr;
+ AddrModeFound = false;
break;
}
// If the addressing mode couldn't be determined, or if multiple different
// ones were determined, bail out now.
- if (!Consensus) {
+ if (!AddrModeFound) {
TPT.rollback(LastKnownGood);
return false;
}
TPT.commit();
// If all the instructions matched are already in this BB, don't do anything.
- if (none_of(AddrModeInsts, [&](Value *V) {
+ // If we saw Phi node then it is not local definitely.
+ if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) {
return IsNonLocalValue(V, MemoryInst->getParent());
- })) {
+ })) {
DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
@@ -3935,11 +4358,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs ||
(!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
- TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
- ->useAA())) {
+ SubtargetInfo->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -3963,6 +4385,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrMode.Scale = 0;
}
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ //
+ // (See below for code to add the scale.)
+ if (AddrMode.Scale) {
+ Type *ScaledRegTy = AddrMode.ScaledReg->getType();
+ if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
+ cast<IntegerType>(ScaledRegTy)->getBitWidth())
+ return false;
+ }
+
if (AddrMode.BaseGV) {
if (ResultPtr)
return false;
@@ -3973,14 +4409,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If the real base value actually came from an inttoptr, then the matcher
// will look through it and provide only the integer value. In that case,
// use it here.
- if (!ResultPtr && AddrMode.BaseReg) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
- AddrMode.BaseReg = nullptr;
- } else if (!ResultPtr && AddrMode.Scale == 1) {
- ResultPtr =
- Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
- AddrMode.Scale = 0;
+ if (!DL->isNonIntegralPointerType(Addr->getType())) {
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
+ "sunkaddr");
+ AddrMode.Scale = 0;
+ }
}
if (!ResultPtr &&
@@ -4011,19 +4449,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
- } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
- cast<IntegerType>(V->getType())->getBitWidth()) {
- V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
} else {
- // It is only safe to sign extend the BaseReg if we know that the math
- // required to create it did not overflow before we extend it. Since
- // the original IR value was tossed in favor of a constant back when
- // the AddrMode was created we need to bail out gracefully if widths
- // do not match instead of extending it.
- Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
- if (I && (ResultIndex != AddrMode.BaseReg))
- I->eraseFromParent();
- return false;
+ assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth() &&
+ "We can't transform if ScaledReg is too narrow");
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
}
if (AddrMode.Scale != 1)
@@ -4042,7 +4472,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// We need to add this separately from the scale above to help with
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
@@ -4053,14 +4483,27 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = ResultPtr;
} else {
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
+ // We'd require a ptrtoint/inttoptr down the line, which we can't do for
+ // non-integral pointers, so in that case bail out now.
+ Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
+ Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
+ PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
+ PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
+ if (DL->isNonIntegralPointerType(Addr->getType()) ||
+ (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
+ (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
+ (AddrMode.BaseGV &&
+ DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
+ return false;
+
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
@@ -4140,9 +4583,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// using it.
if (Repl->use_empty()) {
// This can cause recursive deletion, which can invalidate our iterator.
- // Use a WeakVH to hold onto it in case this happens.
+ // Use a WeakTrackingVH to hold onto it in case this happens.
Value *CurValue = &*CurInstIterator;
- WeakVH IterHandle(CurValue);
+ WeakTrackingVH IterHandle(CurValue);
BasicBlock *BB = CurInstIterator->getParent();
RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
@@ -4164,7 +4607,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
const TargetRegisterInfo *TRI =
- TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
+ TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
TLI->ParseConstraints(*DL, TRI, CS);
unsigned ArgNo = 0;
@@ -4185,14 +4628,14 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
-/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
/// sign extensions.
-static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
- assert(!Inst->use_empty() && "Input must have at least one use");
- const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
+ assert(!Val->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
bool IsSExt = isa<SExtInst>(FirstUser);
Type *ExtTy = FirstUser->getType();
- for (const User *U : Inst->users()) {
+ for (const User *U : Val->users()) {
const Instruction *UI = cast<Instruction>(U);
if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
return false;
@@ -4202,11 +4645,11 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
continue;
// If IsSExt is true, we are in this situation:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty1 a to ty3
// Assuming ty2 is shorter than ty3, this could be turned into:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty2 b to ty3
// However, the last sext is not free.
@@ -4233,51 +4676,44 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
return true;
}
-/// \brief Try to form ExtLd by promoting \p Exts until they reach a
-/// load instruction.
-/// If an ext(load) can be formed, it is returned via \p LI for the load
-/// and \p Inst for the extension.
-/// Otherwise LI == nullptr and Inst == nullptr.
-/// When some promotion happened, \p TPT contains the proper state to
-/// revert them.
-///
-/// \return true when promoting was necessary to expose the ext(load)
-/// opportunity, false otherwise.
+/// \brief Try to speculatively promote extensions in \p Exts and continue
+/// promoting through newly promoted operands recursively as far as doing so is
+/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
+/// When some promotion happened, \p TPT contains the proper state to revert
+/// them.
///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \encode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
- LoadInst *&LI, Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstsCost = 0) {
- // Iterate over all the extensions to see if one form an ext(load).
+/// \return true if some promotion happened, false otherwise.
+bool CodeGenPrepare::tryToPromoteExts(
+ TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost) {
+ bool Promoted = false;
+
+ // Iterate over all the extensions to try to promote them.
for (auto I : Exts) {
- // Check if we directly have ext(load).
- if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
- Inst = I;
- // No promotion happened here.
- return false;
+ // Early check if we directly have ext(load).
+ if (isa<LoadInst>(I->getOperand(0))) {
+ ProfitablyMovedExts.push_back(I);
+ continue;
}
- // Check whether or not we want to do any promotion.
+
+ // Check whether or not we want to do any promotion. The reason we have
+ // this check inside the for loop is to catch the case where an extension
+ // is directly fed by a load because in such case the extension can be moved
+ // up without any promotion on its operands.
if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
- continue;
+ return false;
+
// Get the action to perform the promotion.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- I, InsertedInsts, *TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
// Check if we can promote.
- if (!TPH)
+ if (!TPH) {
+ // Save the current extension as we cannot move up through its operand.
+ ProfitablyMovedExts.push_back(I);
continue;
+ }
+
// Save the current state.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4297,110 +4733,275 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
- TotalCreatedInstsCost -= ExtCost;
+ // FIXME: It would be possible to propagate a negative value instead of
+ // conservatively ceiling it to 0.
+ TotalCreatedInstsCost =
+ std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
- // The promotion is not profitable, rollback to the previous state.
+ // This promotion is not profitable, rollback to the previous state, and
+ // save the current extension in ProfitablyMovedExts as the latest
+ // speculative promotion turned out to be unprofitable.
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // Continue promoting NewExts as far as doing so is profitable.
+ SmallVector<Instruction *, 2> NewlyMovedExts;
+ (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
+ bool NewPromoted = false;
+ for (auto ExtInst : NewlyMovedExts) {
+ Instruction *MovedExt = cast<Instruction>(ExtInst);
+ Value *ExtOperand = MovedExt->getOperand(0);
+ // If we have reached to a load, we need this extra profitability check
+ // as it could potentially be merged into an ext(load).
+ if (isa<LoadInst>(ExtOperand) &&
+ !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
+ continue;
+
+ ProfitablyMovedExts.push_back(MovedExt);
+ NewPromoted = true;
+ }
+
+ // If none of speculative promotions for NewExts is profitable, rollback
+ // and save the current extension (I) as the last profitable extension.
+ if (!NewPromoted) {
TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
continue;
}
// The promotion is profitable.
- // Check if it exposes an ext(load).
- (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
- if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
- // If we have created a new extension, i.e., now we have two
- // extensions. We must make sure one of them is merged with
- // the load, otherwise we may degrade the code quality.
- (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
- // Promotion happened.
- return true;
- // If this does not help to expose an ext(load) then, rollback.
- TPT.rollback(LastKnownGood);
+ Promoted = true;
}
- // None of the extension can form an ext(load).
- LI = nullptr;
- Inst = nullptr;
- return false;
+ return Promoted;
+}
+
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ DominatorTree DT(F);
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (DT.dominates(Inst, Pt)) {
+ Pt->replaceAllUsesWith(Inst);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!DT.dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // expermients show it is not profitable.
+ continue;
+ Inst->replaceAllUsesWith(Pt);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
+
+/// Return true, if an ext(load) can be formed from an extension in
+/// \p MovedExts.
+bool CodeGenPrepare::canFormExtLd(
+ const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
+ Instruction *&Inst, bool HasPromoted) {
+ for (auto *MovedExtInst : MovedExts) {
+ if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
+ LI = cast<LoadInst>(MovedExtInst->getOperand(0));
+ Inst = MovedExtInst;
+ break;
+ }
+ }
+ if (!LI)
+ return false;
+
+ // If they're already in the same block, there's nothing to do.
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == Inst->getParent())
+ return false;
+
+ return TLI->isExtLoad(LI, Inst, *DL);
}
/// Move a zext or sext fed by a load into the same basic block as the load,
/// unless conditions are unfavorable. This allows SelectionDAG to fold the
/// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
- // ExtLoad formation infrastructure requires TLI to be effective.
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ // ExtLoad formation and address type promotion infrastructure requires TLI to
+ // be effective.
if (!TLI)
return false;
- // Try to promote a chain of computation if it allows to form
- // an extended load.
- TypePromotionTransaction TPT;
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
- TPT.getRestorationPoint();
+ TPT.getRestorationPoint();
SmallVector<Instruction *, 1> Exts;
- Exts.push_back(I);
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
+
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
+
// Look for a load being extended.
LoadInst *LI = nullptr;
- Instruction *OldExt = I;
- bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
- if (!LI || !I) {
- assert(!HasPromoted && !LI && "If we did not match any load instruction "
- "the code must remain the same");
- I = OldExt;
- return false;
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load
+ ExtFedByLoad->removeFromParent();
+ ExtFedByLoad->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location
+ // would pessimize the debugging experience, as well as negatively impact
+ // the quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug
+ // location for the load and the zext.
+ ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
}
- // If they're already in the same block, there's nothing to do.
- // Make the cheap checks first if we did not promote.
- // If we promoted, we need to check if it is indeed profitable.
- if (!HasPromoted && LI->getParent() == I->getParent())
- return false;
-
- EVT VT = TLI->getValueType(*DL, I->getType());
- EVT LoadVT = TLI->getValueType(*DL, LI->getType());
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
- // If the load has other users and the truncate is not free, this probably
- // isn't worthwhile.
- if (!LI->hasOneUse() &&
- (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(I->getType(), LI->getType())) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
- return false;
- }
+ TPT.rollback(LastKnownGood);
+ return false;
+}
- // Check whether the target supports casts folded into loads.
- unsigned LType;
- if (isa<ZExtInst>(I))
- LType = ISD::ZEXTLOAD;
- else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
- LType = ISD::SEXTLOAD;
- }
- if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
return false;
}
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- TPT.commit();
- I->removeFromParent();
- I->insertAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location would
- // pessimize the debugging experience, as well as negatively impact the
- // quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug location
- // for the load and the zext.
- I->setDebugLoc(LI->getDebugLoc());
- ++NumExtsMoved;
- return true;
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@@ -4534,13 +5135,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
return false;
- // Skip loads we've already transformed or have no reason to transform.
- if (Load->hasOneUse()) {
- User *LoadUser = *Load->user_begin();
- if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
- !dyn_cast<PHINode>(LoadUser))
- return false;
- }
+ // Skip loads we've already transformed.
+ if (Load->hasOneUse() &&
+ InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
+ return false;
// Look at all uses of Load, looking through phis, to determine how many bits
// of the loaded value are needed.
@@ -4590,16 +5188,14 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
if (!ShlC)
return false;
uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
- auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
- DemandBits |= ShlDemandBits;
+ DemandBits.setLowBits(BitWidth - ShiftAmt);
break;
}
case llvm::Instruction::Trunc: {
EVT TruncVT = TLI->getValueType(*DL, I->getType());
unsigned TruncBitWidth = TruncVT.getSizeInBits();
- auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
- DemandBits |= TruncBits;
+ DemandBits.setLowBits(TruncBitWidth);
break;
}
@@ -4620,7 +5216,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
//
// Also avoid hoisting if we didn't see any ands with the exact DemandBits
// mask, since these are the only ands that will be removed by isel.
- if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
WidestAndBits != DemandBits)
return false;
@@ -4636,6 +5232,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
IRBuilder<> Builder(Load->getNextNode());
auto *NewAnd = dyn_cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(NewAnd);
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
@@ -4985,7 +5584,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
SI->setCondition(ExtInst);
- for (SwitchInst::CaseIt Case : SI->cases()) {
+ for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
@@ -4995,6 +5594,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
return true;
}
+
namespace {
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
@@ -5473,7 +6073,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
return true;
}
-bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
if (InsertedInsts.count(I))
@@ -5483,7 +6083,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
+ if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -5514,7 +6114,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = moveExtToFormExtLoad(I);
+ bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}
@@ -5548,8 +6148,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
return false;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(),
+ RMW->getType(), AS);
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
+ }
+
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+ if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
+ EnableAndCmpSinking && TLI)
+ return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
@@ -5612,7 +6228,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
// In this pass we look for GEP and cast instructions that are used
// across basic blocks and rewrite them to improve basic-block-at-a-time
// selection.
-bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
SunkAddrs.clear();
bool MadeChange = false;
@@ -5679,68 +6295,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
-// If there is a sequence that branches based on comparing a single bit
-// against zero that can be combined into a single instruction, and the
-// target supports folding these into a single instruction, sink the
-// mask and compare into the branch uses. Do this before OptimizeBlock ->
-// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
-// searched for.
-bool CodeGenPrepare::sinkAndCmp(Function &F) {
- if (!EnableAndCmpSinking)
- return false;
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
- return false;
- bool MadeChange = false;
- for (BasicBlock &BB : F) {
- // Does this BB end with the following?
- // %andVal = and %val, #single-bit-set
- // %icmpVal = icmp %andResult, 0
- // br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Brcc || !Brcc->isConditional())
- continue;
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != &BB)
- continue;
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
- if (!Zero || !Zero->isZero())
- continue;
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
- continue;
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
- if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
- continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
-
- // Push the "and; icmp" for any users that are conditional branches.
- // Since there can only be one branch use per BB, we don't need to keep
- // track of which BBs we insert into.
- for (Use &TheUse : Cmp->uses()) {
- // Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
- if (!BrccUser || !BrccUser->isConditional())
- continue;
- BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == &BB) continue;
- DEBUG(dbgs() << "found Brcc use\n");
-
- // Sink the "and; icmp" to use.
- MadeChange = true;
- BinaryOperator *NewAnd =
- BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
- BrccUser);
- CmpInst *NewCmp =
- CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
- "", BrccUser);
- TheUse = NewCmp;
- ++NumAndCmpsMoved;
- DEBUG(BrccUser->getParent()->dump());
- }
- }
- return MadeChange;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
@@ -5833,7 +6387,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
// Update PHI nodes in both successors. The original BB needs to be
- // replaced in one succesor's PHI nodes, because the branch comes now from
+ // replaced in one successor's PHI nodes, because the branch comes now from
// the newly generated BB (NewBB). In the other successor we need to add one
// incoming edge to the PHI nodes, because both branch instructions target
// now the same successor. Depending on the original branch condition
diff --git a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
index 1e46a7a..7f7350f 100644
--- a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
@@ -41,7 +41,7 @@ namespace {
Type *VoidTy = Type::getVoidTy(F.getContext());
Constant *CountingFn =
F.getParent()->getOrInsertFunction(CountingFunctionName,
- VoidTy, nullptr);
+ VoidTy);
CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5d60c30..a3cf284 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -71,8 +71,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
- if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg))
+ continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -645,10 +648,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// as well.
const SUnit *SU = MISUnitMap[Q->second->getParent()];
if (!SU) continue;
- for (DbgValueVector::iterator DVI = DbgValues.begin(),
- DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q->second->getParent())
- UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValues(DbgValues, Q->second->getParent(),
+ AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 7b1b2d6..853b9af 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -23,49 +23,59 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "packets"
-
#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "packets"
+
static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+
static unsigned InstrCount = 0;
// --------------------------------------------------------------------
// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-namespace {
- DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
- return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
- }
+static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+ return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+}
- /// Return the DFAInput for an instruction class input vector.
- /// This function is used in both DFAPacketizer.cpp and in
- /// DFAPacketizerEmitter.cpp.
- DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
- DFAInput InsnInput = 0;
- assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
- for (auto U : InsnClass)
- InsnInput = addDFAFuncUnits(InsnInput, U);
- return InsnInput;
- }
+/// Return the DFAInput for an instruction class input vector.
+/// This function is used in both DFAPacketizer.cpp and in
+/// DFAPacketizerEmitter.cpp.
+static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+ DFAInput InsnInput = 0;
+ assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
+ for (auto U : InsnClass)
+ InsnInput = addDFAFuncUnits(InsnInput, U);
+ return InsnInput;
}
+
// --------------------------------------------------------------------
DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
const DFAStateInput (*SIT)[2],
const unsigned *SET):
- InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
- DFAStateEntryTable(SET) {
+ InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
// Make sure DFA types are large enough for the number of terms & resources.
static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
(8 * sizeof(DFAInput)),
@@ -75,7 +85,6 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
"(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
}
-
// Read the DFA transition table and update CachedTable.
//
// Format of the transition tables:
@@ -97,7 +106,6 @@ void DFAPacketizer::ReadTable(unsigned int state) {
DFAStateInputTable[i][1];
}
-
// Return the DFAInput for an instruction class.
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
// Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
@@ -112,16 +120,14 @@ DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
return InsnInput;
}
-
// Return the DFAInput for an instruction class input vector.
DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
return getDFAInsnInput(InsnClass);
}
-
// Check if the resources occupied by a MCInstrDesc are available in the
// current state.
-bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
@@ -129,10 +135,9 @@ bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
return CachedTable.count(StateTrans) != 0;
}
-
// Reserve the resources occupied by a MCInstrDesc and change the current
// state to reflect that change.
-void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
@@ -141,24 +146,22 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
CurrentState = CachedTable[StateTrans];
}
-
// Check if the resources occupied by a machine instruction are available
// in the current state.
-bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) {
- const llvm::MCInstrDesc &MID = MI.getDesc();
+bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
+ const MCInstrDesc &MID = MI.getDesc();
return canReserveResources(&MID);
}
-
// Reserve the resources occupied by a machine instruction and change the
// current state to reflect that change.
-void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) {
- const llvm::MCInstrDesc &MID = MI.getDesc();
+void DFAPacketizer::reserveResources(MachineInstr &MI) {
+ const MCInstrDesc &MID = MI.getDesc();
reserveResources(&MID);
}
-
namespace llvm {
+
// This class extends ScheduleDAGInstrs and overrides the schedule method
// to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
@@ -166,9 +169,11 @@ private:
AliasAnalysis *AA;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
AliasAnalysis *AA);
+
// Actual scheduling work.
void schedule() override;
@@ -176,11 +181,12 @@ public:
void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
Mutations.push_back(std::move(Mutation));
}
+
protected:
void postprocessDAG();
};
-}
+} // end namespace llvm
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
@@ -189,21 +195,18 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
CanHandleTerminators = true;
}
-
/// Apply each ScheduleDAGMutation step in order.
void DefaultVLIWScheduler::postprocessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
-
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
postprocessDAG();
}
-
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
MachineLoopInfo &mli, AliasAnalysis *aa)
: MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
@@ -211,15 +214,11 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
}
-
VLIWPacketizerList::~VLIWPacketizerList() {
- if (VLIWScheduler)
- delete VLIWScheduler;
- if (ResourceTracker)
- delete ResourceTracker;
+ delete VLIWScheduler;
+ delete ResourceTracker;
}
-
// End the current packet, bundle packet instructions and reset DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI) {
@@ -239,7 +238,6 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
DEBUG(dbgs() << "End packet\n");
}
-
// Bundle machine instructions into packets.
void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
MachineBasicBlock::iterator BeginItr,
@@ -338,7 +336,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
VLIWScheduler->finishBlock();
}
-
// Add a DAG mutation object to the ordered list.
void VLIWPacketizerList::addMutation(
std::unique_ptr<ScheduleDAGMutation> Mutation) {
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 17c229a..91d18e2 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -23,7 +23,7 @@
using namespace llvm;
-#define DEBUG_TYPE "codegen-dce"
+#define DEBUG_TYPE "dead-mi-elimination"
STATISTIC(NumDeletes, "Number of dead instructions deleted");
@@ -54,7 +54,7 @@ namespace {
char DeadMachineInstructionElim::ID = 0;
char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
-INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE,
"Remove dead machine instructions", false, false)
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
@@ -110,7 +110,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
- // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
index a7ba694..ab9a059 100644
--- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -132,8 +132,7 @@ private:
char DetectDeadLanes::ID = 0;
char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
-INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
- false, false)
+INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
/// Returns true if \p MI will get lowered to a series of COPY instructions.
/// We call this a COPY-like instruction.
@@ -441,7 +440,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
if (CrossCopy)
- DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI);
+ DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
}
if (!CrossCopy)
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 38af19a..2f83326 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -12,12 +12,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -34,8 +35,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
class DwarfEHPrepare : public FunctionPass {
- const TargetMachine *TM;
-
// RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
@@ -52,15 +51,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
- // practice.
DwarfEHPrepare()
- : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
- TLI(nullptr) {}
-
- DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
- TLI(nullptr) {}
+ : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) {
+ }
bool runOnFunction(Function &Fn) override;
@@ -78,18 +71,18 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
- "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
- "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
-FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
- return new DwarfEHPrepare(TM);
-}
+FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); }
void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
}
@@ -254,9 +247,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- assert(TM && "DWARF EH preparation requires a target machine");
+ const TargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ TLI = TM.getSubtargetImpl(Fn)->getTargetLowering();
bool Changed = InsertUnwindResumeCalls(Fn);
DT = nullptr;
TLI = nullptr;
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 7291727..402afe7 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -616,13 +616,13 @@ private:
char EarlyIfConverter::ID = 0;
char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
-INITIALIZE_PASS_BEGIN(EarlyIfConverter,
- "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(EarlyIfConverter,
- "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
+ "Early If Converter", false, false)
void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 32c57e3..e272d25 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -6,21 +6,9 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the execution dependency fix pass.
-//
-// Some X86 SSE instructions like mov, and, or, xor are available in different
-// variants for different operand types. These variant instructions are
-// equivalent, but on Nehalem and newer cpus there is extra latency
-// transferring data between integer and floating point domains. ARM cores
-// have similar issues when they are configured with both VFP and NEON
-// pipelines.
-//
-// This pass changes the variant instructions to minimize domain crossings.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -35,193 +23,18 @@
using namespace llvm;
-#define DEBUG_TYPE "execution-fix"
-
-/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
-/// of execution domains.
-///
-/// An open DomainValue represents a set of instructions that can still switch
-/// execution domain. Multiple registers may refer to the same open
-/// DomainValue - they will eventually be collapsed to the same execution
-/// domain.
-///
-/// A collapsed DomainValue represents a single register that has been forced
-/// into one of more execution domains. There is a separate collapsed
-/// DomainValue for each register, but it may contain multiple execution
-/// domains. A register value is initially created in a single execution
-/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact that the register is now available in multiple
-/// domains.
-namespace {
-struct DomainValue {
- // Basic reference counting.
- unsigned Refs;
-
- // Bitmask of available domains. For an open DomainValue, it is the still
- // possible domains for collapsing. For a collapsed DomainValue it is the
- // domains where the register is available for free.
- unsigned AvailableDomains;
-
- // Pointer to the next DomainValue in a chain. When two DomainValues are
- // merged, Victim.Next is set to point to Victor, so old DomainValue
- // references can be updated by following the chain.
- DomainValue *Next;
-
- // Twiddleable instructions using or defining these registers.
- SmallVector<MachineInstr*, 8> Instrs;
-
- // A collapsed DomainValue has no instructions to twiddle - it simply keeps
- // track of the domains where the registers are already available.
- bool isCollapsed() const { return Instrs.empty(); }
-
- // Is domain available?
- bool hasDomain(unsigned domain) const {
- assert(domain <
- static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
- "undefined behavior");
- return AvailableDomains & (1u << domain);
- }
-
- // Mark domain as available.
- void addDomain(unsigned domain) {
- AvailableDomains |= 1u << domain;
- }
-
- // Restrict to a single domain available.
- void setSingleDomain(unsigned domain) {
- AvailableDomains = 1u << domain;
- }
-
- // Return bitmask of domains that are available and in mask.
- unsigned getCommonDomains(unsigned mask) const {
- return AvailableDomains & mask;
- }
-
- // First domain available.
- unsigned getFirstDomain() const {
- return countTrailingZeros(AvailableDomains);
- }
-
- DomainValue() : Refs(0) { clear(); }
-
- // Clear this DomainValue and point to next which has all its data.
- void clear() {
- AvailableDomains = 0;
- Next = nullptr;
- Instrs.clear();
- }
-};
-}
-
-namespace {
-/// Information about a live register.
-struct LiveReg {
- /// Value currently in this register, or NULL when no value is being tracked.
- /// This counts as a DomainValue reference.
- DomainValue *Value;
-
- /// Instruction that defined this register, relative to the beginning of the
- /// current basic block. When a LiveReg is used to represent a live-out
- /// register, this value is relative to the end of the basic block, so it
- /// will be a negative number.
- int Def;
-};
-} // anonymous namespace
-
-namespace {
-class ExeDepsFix : public MachineFunctionPass {
- static char ID;
- SpecificBumpPtrAllocator<DomainValue> Allocator;
- SmallVector<DomainValue*,16> Avail;
-
- const TargetRegisterClass *const RC;
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RegClassInfo;
- std::vector<SmallVector<int, 1>> AliasMap;
- const unsigned NumRegs;
- LiveReg *LiveRegs;
- typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
- LiveOutMap LiveOuts;
-
- /// List of undefined register reads in this block in forward order.
- std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
-
- /// Storage for register unit liveness.
- LivePhysRegs LiveRegSet;
-
- /// Current instruction number.
- /// The first instruction in each basic block is 0.
- int CurInstr;
-
- /// True when the current block has a predecessor that hasn't been visited
- /// yet.
- bool SeenUnknownBackEdge;
-
-public:
- ExeDepsFix(const TargetRegisterClass *rc)
- : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
- StringRef getPassName() const override { return "Execution dependency fix"; }
-
-private:
- iterator_range<SmallVectorImpl<int>::const_iterator>
- regIndices(unsigned Reg) const;
-
- // DomainValue allocation.
- DomainValue *alloc(int domain = -1);
- DomainValue *retain(DomainValue *DV) {
- if (DV) ++DV->Refs;
- return DV;
- }
- void release(DomainValue*);
- DomainValue *resolve(DomainValue*&);
-
- // LiveRegs manipulations.
- void setLiveReg(int rx, DomainValue *DV);
- void kill(int rx);
- void force(int rx, unsigned domain);
- void collapse(DomainValue *dv, unsigned domain);
- bool merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock(MachineBasicBlock*);
- void leaveBasicBlock(MachineBasicBlock*);
- void visitInstr(MachineInstr*);
- void processDefs(MachineInstr*, bool Kill);
- void visitSoftInstr(MachineInstr*, unsigned mask);
- void visitHardInstr(MachineInstr*, unsigned domain);
- void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref);
- bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
- void processUndefReads(MachineBasicBlock*);
-};
-}
-
-char ExeDepsFix::ID = 0;
+#define DEBUG_TYPE "execution-deps-fix"
/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndices(unsigned Reg) const {
+ExecutionDepsFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
}
-DomainValue *ExeDepsFix::alloc(int domain) {
+DomainValue *ExecutionDepsFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
@@ -234,7 +47,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
/// Release a reference to DV. When the last reference is released,
/// collapse if needed.
-void ExeDepsFix::release(DomainValue *DV) {
+void ExecutionDepsFix::release(DomainValue *DV) {
while (DV) {
assert(DV->Refs && "Bad DomainValue");
if (--DV->Refs)
@@ -254,7 +67,7 @@ void ExeDepsFix::release(DomainValue *DV) {
/// Follow the chain of dead DomainValues until a live DomainValue is reached.
/// Update the referenced pointer when necessary.
-DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
return DV;
@@ -271,7 +84,7 @@ DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
@@ -283,7 +96,7 @@ void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
}
// Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::kill(int rx) {
+void ExecutionDepsFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (!LiveRegs[rx].Value)
@@ -294,7 +107,7 @@ void ExeDepsFix::kill(int rx) {
}
/// Force register rx into domain.
-void ExeDepsFix::force(int rx, unsigned domain) {
+void ExecutionDepsFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (DomainValue *dv = LiveRegs[rx].Value) {
@@ -317,7 +130,7 @@ void ExeDepsFix::force(int rx, unsigned domain) {
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
@@ -333,7 +146,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
}
/// All instructions and registers in B are moved to A, and B is released.
-bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -359,10 +172,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
}
/// Set up LiveRegs by merging predecessor live-out values.
-void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
- // Detect back-edges from predecessors we haven't processed yet.
- SeenUnknownBackEdge = false;
-
+void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Reset instruction counter in each basic block.
CurInstr = 0;
@@ -397,18 +207,21 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) {
- SeenUnknownBackEdge = true;
+ auto fi = MBBInfos.find(*pi);
+ assert(fi != MBBInfos.end() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ LiveReg *Incoming = fi->second.OutRegs;
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming == nullptr) {
continue;
}
- assert(fi->second && "Can't have NULL entries");
for (unsigned rx = 0; rx != NumRegs; ++rx) {
// Use the most recent predecessor def for each register.
- LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
- DomainValue *pdv = resolve(fi->second[rx].Value);
+ DomainValue *pdv = resolve(Incoming[rx].Value);
if (!pdv)
continue;
if (!LiveRegs[rx].Value) {
@@ -432,35 +245,34 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
force(rx, pdv->getFirstDomain());
}
}
- DEBUG(dbgs() << "BB#" << MBB->getNumber()
- << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+ DEBUG(
+ dbgs() << "BB#" << MBB->getNumber()
+ << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
}
-void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
assert(LiveRegs && "Must enter basic block first.");
- // Save live registers at end of MBB - used by enterBasicBlock().
- // Also use LiveOuts as a visited set to detect back-edges.
- bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
-
- if (First) {
- // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
- // the end of this block instead of the beginning.
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- LiveRegs[i].Def -= CurInstr;
- } else {
- // Insertion failed, this must be the second pass.
+ LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ MBBInfos[MBB].OutRegs = LiveRegs;
+
+ // While processing the basic block, we kept `Def` relative to the start
+ // of the basic block for convenience. However, future use of this information
+ // only cares about the clearance from the end of the block, so adjust
+ // everything to be relative to the end of the basic block.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ if (OldOutRegs) {
+ // This must be the second pass.
// Release all the DomainValues instead of keeping them.
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- release(LiveRegs[i].Value);
- delete[] LiveRegs;
+ release(OldOutRegs[i].Value);
+ delete[] OldOutRegs;
}
LiveRegs = nullptr;
}
-void ExeDepsFix::visitInstr(MachineInstr *MI) {
- if (MI->isDebugValue())
- return;
-
+bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
// Update instructions with explicit execution domains.
std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
if (DomP.first) {
@@ -470,16 +282,16 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
visitHardInstr(MI, DomP.first);
}
- // Process defs to track register ages, and kill values clobbered by generic
- // instructions.
- processDefs(MI, !DomP.first);
+ return !DomP.first;
}
/// \brief Helps avoid false dependencies on undef registers by updating the
/// machine instructions' undef operand to use a register that the instruction
/// is truly dependent on, or use a register with clearance higher than Pref.
-void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+/// Returns true if it was able to find a true dependency, thus not requiring
+/// a dependency breaking instruction regardless of clearance.
+bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
+ unsigned OpIdx, unsigned Pref) {
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
@@ -487,7 +299,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update only undef operands that are mapped to one register.
if (AliasMap[OriginalReg].size() != 1)
- return;
+ return false;
// Get the undef operand's register class
const TargetRegisterClass *OpRC =
@@ -502,7 +314,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// We found a true dependency - replace the undef register with the true
// dependency.
MO.setReg(CurrMO.getReg());
- return;
+ return true;
}
// Go over all registers in the register class and find the register with
@@ -527,12 +339,14 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update the operand if we found a register with better clearance.
if (MaxClearanceReg != OriginalReg)
MO.setReg(MaxClearanceReg);
+
+ return false;
}
/// \brief Return true to if it makes sense to break dependence on a partial def
/// or undef use.
-bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
for (int rx : regIndices(reg)) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
@@ -542,14 +356,7 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
DEBUG(dbgs() << ": Break dependency.\n");
continue;
}
- // The current clearance seems OK, but we may be ignoring a def from a
- // back-edge.
- if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
- DEBUG(dbgs() << ": OK .\n");
- return false;
- }
- // A def from an unprocessed back-edge may make us break this dependency.
- DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ DEBUG(dbgs() << ": OK .\n");
return false;
}
return true;
@@ -559,16 +366,22 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
// If Kill is set, also kill off DomainValues clobbered by the defs.
//
// Also break dependencies on partial defs and undef uses.
-void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
+ bool Kill) {
assert(!MI->isDebugValue() && "Won't process debug values");
// Break dependence on undef uses. Do this before updating LiveRegs below.
unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- pickBestRegisterForUndef(MI, OpNum, Pref);
- if (shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
+ if (breakDependency) {
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
}
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
@@ -584,11 +397,13 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
- // Check clearance before partial register updates.
- // Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
- if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(*MI, i, TRI);
+ if (breakDependency) {
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+ }
// How many instructions since rx was last written?
LiveRegs[rx].Def = CurInstr;
@@ -607,7 +422,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
/// only do it on demand. Note that the occurrence of undefined register reads
/// that should be broken is very rare, but when they occur we may have many in
/// a single block.
-void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
@@ -640,7 +455,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
// A hard instruction only works in one domain. All input registers will be
// forced into that domain.
-void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
// Collapse all uses.
for (unsigned i = mi->getDesc().getNumDefs(),
e = mi->getDesc().getNumOperands(); i != e; ++i) {
@@ -663,7 +478,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
}
// A soft instruction can be changed to work in other domains given by mask.
-void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Bitmask of available domains for this instruction after taking collapsed
// operands into account.
unsigned available = mask;
@@ -774,7 +589,34 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
}
-bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
+ bool PrimaryPass) {
+ enterBasicBlock(MBB);
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ bool breakDependency = isBlockDone(MBB);
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isDebugValue()) {
+ bool Kill = false;
+ if (PrimaryPass)
+ Kill = visitInstr(&MI);
+ processDefs(&MI, breakDependency, Kill);
+ }
+ }
+ if (breakDependency)
+ processUndefReads(MBB);
+ leaveBasicBlock(MBB);
+}
+
+bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
+ return MBBInfos[MBB].PrimaryCompleted &&
+ MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
+ MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
+}
+
+bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(*mf.getFunction()))
return false;
MF = &mf;
@@ -810,52 +652,104 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
AliasMap[*AI].push_back(i);
}
+ // Initialize the MMBInfos
+ for (auto &MBB : mf) {
+ MBBInfo InitialInfo;
+ MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
+ }
+
+ /*
+ * We want to visit every instruction in every basic block in order to update
+ * it's execution domain or break any false dependencies. However, for the
+ * dependency breaking, we need to know clearances from all predecessors
+ * (including any backedges). One way to do so would be to do two complete
+ * passes over all basic blocks/instructions, the first for recording
+ * clearances, the second to break the dependencies. However, for functions
+ * without backedges, or functions with a lot of straight-line code, and
+ * a small loop, that would be a lot of unnecessary work (since only the
+ * BBs that are part of the loop require two passes). As an example,
+ * consider the following loop.
+ *
+ *
+ * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
+ * ^ |
+ * +----------------------------------+
+ *
+ * The iteration order is as follows:
+ * Naive: PH A B C D A' B' C' D'
+ * Optimized: PH A B C A' B' C' D
+ *
+ * Note that we avoid processing D twice, because we can entirely process
+ * the predecessors before getting to D. We call a block that is ready
+ * for its second round of processing `done` (isBlockDone). Once we finish
+ * processing some block, we update the counters in MBBInfos and re-process
+ * any successors that are now done.
+ */
+
MachineBasicBlock *Entry = &*MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
- SmallVector<MachineBasicBlock*, 16> Loops;
+ SmallVector<MachineBasicBlock *, 4> Workqueue;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
MachineBasicBlock *MBB = *MBBI;
- enterBasicBlock(MBB);
- if (SeenUnknownBackEdge)
- Loops.push_back(MBB);
- for (MachineInstr &MI : *MBB)
- visitInstr(&MI);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // N.B: IncomingProcessed and IncomingCompleted were already updated while
+ // processing this block's predecessors.
+ MBBInfos[MBB].PrimaryCompleted = true;
+ MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
+ bool Primary = true;
+ Workqueue.push_back(MBB);
+ while (!Workqueue.empty()) {
+ MachineBasicBlock *ActiveMBB = &*Workqueue.back();
+ Workqueue.pop_back();
+ processBasicBlock(ActiveMBB, Primary);
+ bool Done = isBlockDone(ActiveMBB);
+ for (auto *Succ : ActiveMBB->successors()) {
+ if (!isBlockDone(Succ)) {
+ if (Primary) {
+ MBBInfos[Succ].IncomingProcessed++;
+ }
+ if (Done) {
+ MBBInfos[Succ].IncomingCompleted++;
+ }
+ if (isBlockDone(Succ)) {
+ Workqueue.push_back(Succ);
+ }
+ }
+ }
+ Primary = false;
+ }
}
- // Visit all the loop blocks again in order to merge DomainValues from
- // back-edges.
- for (MachineBasicBlock *MBB : Loops) {
- enterBasicBlock(MBB);
- for (MachineInstr &MI : *MBB)
- if (!MI.isDebugValue())
- processDefs(&MI, false);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // We need to go through again and finalize any blocks that are not done yet.
+ // This is possible if blocks have dead predecessors, so we didn't visit them
+ // above.
+ for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
+ MBBI = RPOT.begin(),
+ MBBE = RPOT.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ if (!isBlockDone(MBB)) {
+ processBasicBlock(MBB, false);
+ // Don't update successors here. We'll get to them anyway through this
+ // loop.
+ }
}
// Clear the LiveOuts vectors and collapse any remaining DomainValues.
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
- LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
- if (FI == LiveOuts.end() || !FI->second)
+ auto FI = MBBInfos.find(*MBBI);
+ if (FI == MBBInfos.end() || !FI->second.OutRegs)
continue;
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- if (FI->second[i].Value)
- release(FI->second[i].Value);
- delete[] FI->second;
+ if (FI->second.OutRegs[i].Value)
+ release(FI->second.OutRegs[i].Value);
+ delete[] FI->second.OutRegs;
}
- LiveOuts.clear();
+ MBBInfos.clear();
UndefReads.clear();
Avail.clear();
Allocator.DestroyAll();
return false;
}
-
-FunctionPass *
-llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
- return new ExeDepsFix(RC);
-}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
index 0ec79c2..324ea17 100644
--- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -14,9 +14,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -41,7 +41,7 @@ namespace {
char ExpandISelPseudos::ID = 0;
char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
-INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE,
"Expand ISel Pseudo-instructions", false, false)
bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index ab2382e..4ce86f2 100644
--- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -12,11 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -58,7 +58,7 @@ private:
char ExpandPostRA::ID = 0;
char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
-INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE,
"Post-RA pseudo instruction expansion pass", false, false)
/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
@@ -142,8 +142,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
MachineOperand &DstMO = MI->getOperand(0);
MachineOperand &SrcMO = MI->getOperand(1);
- if (SrcMO.getReg() == DstMO.getReg()) {
- DEBUG(dbgs() << "identity copy: " << *MI);
+ bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
+ if (IdentityCopy || SrcMO.isUndef()) {
+ DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI);
// No need to insert an identity copy instruction, but replace with a KILL
// if liveness is changed.
if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
diff --git a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp
new file mode 100644
index 0000000..70dca3b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -0,0 +1,167 @@
+//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for reduction intrinsics, allowing targets
+// to enable the experimental intrinsics until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+unsigned getOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ return Instruction::FAdd;
+ case Intrinsic::experimental_vector_reduce_fmul:
+ return Instruction::FMul;
+ case Intrinsic::experimental_vector_reduce_add:
+ return Instruction::Add;
+ case Intrinsic::experimental_vector_reduce_mul:
+ return Instruction::Mul;
+ case Intrinsic::experimental_vector_reduce_and:
+ return Instruction::And;
+ case Intrinsic::experimental_vector_reduce_or:
+ return Instruction::Or;
+ case Intrinsic::experimental_vector_reduce_xor:
+ return Instruction::Xor;
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ return Instruction::ICmp;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unexpected ID");
+ }
+}
+
+RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_smax:
+ return RecurrenceDescriptor::MRK_SIntMax;
+ case Intrinsic::experimental_vector_reduce_smin:
+ return RecurrenceDescriptor::MRK_SIntMin;
+ case Intrinsic::experimental_vector_reduce_umax:
+ return RecurrenceDescriptor::MRK_UIntMax;
+ case Intrinsic::experimental_vector_reduce_umin:
+ return RecurrenceDescriptor::MRK_UIntMin;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ return RecurrenceDescriptor::MRK_FloatMax;
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return RecurrenceDescriptor::MRK_FloatMin;
+ default:
+ return RecurrenceDescriptor::MRK_Invalid;
+ }
+}
+
+bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
+ bool Changed = false;
+ SmallVector<IntrinsicInst*, 4> Worklist;
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (auto II = dyn_cast<IntrinsicInst>(&*I))
+ Worklist.push_back(II);
+
+ for (auto *II : Worklist) {
+ IRBuilder<> Builder(II);
+ Value *Vec = nullptr;
+ auto ID = II->getIntrinsicID();
+ auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_fmul:
+ // FMFs must be attached to the call, otherwise it's an ordered reduction
+ // and it can't be handled by generating this shuffle sequence.
+ // TODO: Implement scalarization of ordered reductions here for targets
+ // without native support.
+ if (!II->getFastMathFlags().unsafeAlgebra())
+ continue;
+ Vec = II->getArgOperand(1);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ Vec = II->getArgOperand(0);
+ MRK = getMRK(ID);
+ break;
+ default:
+ continue;
+ }
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+ auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ II->replaceAllUsesWith(Rdx);
+ II->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+}
+
+class ExpandReductions : public FunctionPass {
+public:
+ static char ID;
+ ExpandReductions() : FunctionPass(ID) {
+ initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return expandReductions(F, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char ExpandReductions::ID;
+INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+
+FunctionPass *llvm::createExpandReductionsPass() {
+ return new ExpandReductions();
+}
+
+PreservedAnalyses ExpandReductionsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!expandReductions(F, &TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/contrib/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm/lib/CodeGen/FEntryInserter.cpp
new file mode 100644
index 0000000..0759bf6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -0,0 +1,55 @@
+//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file edits function bodies to insert fentry calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct FEntryInserter : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ FEntryInserter() : MachineFunctionPass(ID) {
+ initializeFEntryInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
+ const std::string FEntryName =
+ MF.getFunction()->getFnAttribute("fentry-call").getValueAsString();
+ if (FEntryName != "true")
+ return false;
+
+ auto &FirstMBB = *MF.begin();
+ auto &FirstMI = *FirstMBB.begin();
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::FENTRY_CALL));
+ return true;
+}
+
+char FEntryInserter::ID = 0;
+char &llvm::FEntryInserterID = FEntryInserter::ID;
+INITIALIZE_PASS(FEntryInserter, "fentry-insert", "Insert fentry calls", false,
+ false)
diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
index 2acafaf..2924b01 100644
--- a/contrib/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- FaultMaps.cpp ---------------------------===//
+//===- FaultMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,13 +8,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FaultMaps.h"
-
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -102,14 +105,16 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
}
}
-
const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
switch (FT) {
default:
llvm_unreachable("unhandled fault type!");
-
case FaultMaps::FaultingLoad:
return "FaultingLoad";
+ case FaultMaps::FaultingLoadStore:
+ return "FaultingLoadStore";
+ case FaultMaps::FaultingStore:
+ return "FaultingStore";
}
}
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index d61afad..9c71b18 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -11,10 +11,10 @@
// funclets being contiguous.
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
using namespace llvm;
#define DEBUG_TYPE "funclet-layout"
@@ -37,7 +37,7 @@ public:
char FuncletLayout::ID = 0;
char &llvm::FuncletLayoutID = FuncletLayout::ID;
-INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE,
"Contiguously Lay Out Funclets", false, false)
bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index be21c73..456fa79 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -11,22 +11,27 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
namespace {
class Printer : public FunctionPass {
static char ID;
+
raw_ostream &OS;
public:
@@ -38,7 +43,8 @@ public:
bool runOnFunction(Function &F) override;
bool doFinalization(Module &M) override;
};
-}
+
+} // end anonymous namespace
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
"Create Garbage Collector Module Metadata", false, false)
@@ -48,7 +54,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
: F(F), S(S), FrameSize(~0LL) {}
-GCFunctionInfo::~GCFunctionInfo() {}
+GCFunctionInfo::~GCFunctionInfo() = default;
// -----------------------------------------------------------------------------
@@ -67,7 +73,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
return *I->second;
GCStrategy *S = getGCStrategy(F.getGC());
- Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
+ Functions.push_back(llvm::make_unique<GCFunctionInfo>(F, *S));
GCFunctionInfo *GFI = Functions.back().get();
FInfoMap[&F] = GFI;
return *GFI;
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index d183c7f..bc7beb6 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GCMetadataPrinter.h"
+
using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
-GCMetadataPrinter::GCMetadataPrinter() {}
+GCMetadataPrinter::GCMetadataPrinter() = default;
-GCMetadataPrinter::~GCMetadataPrinter() {}
+GCMetadataPrinter::~GCMetadataPrinter() = default;
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 31ab86f..6be4c16 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
+//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,4 @@ using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCRegistry)
-GCStrategy::GCStrategy()
- : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
- CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
- UsesMetadata(false) {}
+GCStrategy::GCStrategy() = default;
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1321221..be0c5c2 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -24,40 +24,42 @@
using namespace llvm;
bool CallLowering::lowerCall(
- MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+ MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
- auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+ auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
SmallVector<ArgInfo, 8> OrigArgs;
unsigned i = 0;
- for (auto &Arg : CI.arg_operands()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
- setArgFlags(OrigArg, i + 1, DL, CI);
+ unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
+ for (auto &Arg : CS.args()) {
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
+ i < NumFixedArgs};
+ setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
OrigArgs.push_back(OrigArg);
++i;
}
MachineOperand Callee = MachineOperand::CreateImm(0);
- if (Function *F = CI.getCalledFunction())
+ if (const Function *F = CS.getCalledFunction())
Callee = MachineOperand::CreateGA(F, 0);
else
Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+ ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
if (!OrigRet.Ty->isVoidTy())
- setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+ setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
- return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+ return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
}
template <typename FuncInfoTy>
void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const FuncInfoTy &FuncInfo) const {
- const AttributeSet &Attrs = FuncInfo.getAttributes();
+ const AttributeList &Attrs = FuncInfo.getAttributes();
if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
Arg.Flags.setZExt();
if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
@@ -81,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
- if (FuncInfo.getParamAlignment(OpIdx))
- FrameAlign = FuncInfo.getParamAlignment(OpIdx);
+ if (FuncInfo.getParamAlignment(OpIdx - 2))
+ FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2);
else
FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
Arg.Flags.setByValAlign(FrameAlign);
@@ -103,7 +105,6 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const CallInst &FuncInfo) const;
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- CCAssignFn *AssignFn,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
@@ -116,12 +117,20 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
return false;
}
- for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
+ assert(j < ArgLocs.size() && "Skipped too many arg locs");
+
+ CCValAssign &VA = ArgLocs[j];
+ assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
+
+ if (VA.needsCustom()) {
+ j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ continue;
+ }
if (VA.isRegLoc())
Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index fcd2722..29d1209 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -26,6 +26,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
+ initializeLocalizerPass(Registry);
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 89a042f..ed1bd99 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,43 +11,93 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "irtranslator"
using namespace llvm;
char IRTranslator::ID = 0;
+
INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
-static void reportTranslationError(const Value &V, const Twine &Message) {
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ": " << V << '\n';
- report_fatal_error(Err.str());
+static void reportTranslationError(MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
@@ -56,31 +106,33 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
- // Check if this is the first time we see Val.
- if (!ValReg) {
- // Fill ValRegsSequence with the sequence of registers
- // we need to concat together to produce the value.
- assert(Val.getType()->isSized() &&
- "Don't know how to create an empty vreg");
- unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
- ValReg = VReg;
-
- if (auto CV = dyn_cast<Constant>(&Val)) {
- bool Success = translate(*CV, VReg);
- if (!Success) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return VReg;
- }
- reportTranslationError(Val, "unable to translate constant");
- }
+
+ if (ValReg)
+ return ValReg;
+
+ // Fill ValRegsSequence with the sequence of registers
+ // we need to concat together to produce the value.
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+ unsigned VReg =
+ MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL));
+ ValReg = VReg;
+
+ if (auto CV = dyn_cast<Constant>(&Val)) {
+ bool Success = translate(*CV, VReg);
+ if (!Success) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to translate constant: " << ore::NV("Type", Val.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return VReg;
}
}
- return ValReg;
+
+ return VReg;
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
@@ -112,28 +164,27 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
} else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Alignment = LI->getAlignment();
ValTy = LI->getType();
- } else if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ } else {
+ OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
+ R << "unable to translate memop: " << ore::NV("Opcode", &I);
+ reportTranslationError(*MF, *TPC, *ORE, R);
return 1;
- } else
- llvm_unreachable("unhandled memory instruction");
+ }
return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
}
-MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
+MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
- if (!MBB) {
- MBB = MF->CreateMachineBasicBlock(&BB);
- MF->push_back(MBB);
-
- if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
- }
+ assert(MBB && "BasicBlock was not encountered before");
return *MBB;
}
+void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
+ assert(NewPred && "new predecessor must be a real MachineBasicBlock");
+ MachinePreds[Edge].push_back(NewPred);
+}
+
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
// FIXME: handle signed/unsigned wrapping flags.
@@ -149,6 +200,18 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
+bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+ // -0.0 - X --> G_FNEG
+ if (isa<Constant>(U.getOperand(0)) &&
+ U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(1)));
+ return true;
+ }
+ return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -158,9 +221,14 @@ bool IRTranslator::translateCompare(const User &U,
CmpInst::Predicate Pred =
CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
cast<ConstantExpr>(U).getPredicate());
-
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+ else if (Pred == CmpInst::FCMP_FALSE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
+ else if (Pred == CmpInst::FCMP_TRUE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
else
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
@@ -183,18 +251,21 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
- MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+ MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
}
const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
- MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
- MIRBuilder.buildBr(TgtBB);
+ MachineBasicBlock &TgtBB = getMBB(BrTgt);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurBB.isLayoutSuccessor(&TgtBB))
+ MIRBuilder.buildBr(TgtBB);
// Link successors.
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
for (const BasicBlock *Succ : BrInst.successors())
- CurBB.addSuccessor(&getOrCreateBB(*Succ));
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -209,30 +280,52 @@ bool IRTranslator::translateSwitch(const User &U,
const SwitchInst &SwInst = cast<SwitchInst>(U);
const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+ const BasicBlock *OrigBB = SwInst.getParent();
- LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
for (auto &CaseIt : SwInst.cases()) {
const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
+ MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
- MIRBuilder.buildBrCond(Tst, TrueBB);
- CurBB.addSuccessor(&TrueBB);
+ MIRBuilder.buildBrCond(Tst, TrueMBB);
+ CurMBB.addSuccessor(&TrueMBB);
+ addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
- MachineBasicBlock *FalseBB =
+ MachineBasicBlock *FalseMBB =
MF->CreateMachineBasicBlock(SwInst.getParent());
- MF->push_back(FalseBB);
- MIRBuilder.buildBr(*FalseBB);
- CurBB.addSuccessor(FalseBB);
+ // Insert the comparison blocks one after the other.
+ MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
+ MIRBuilder.buildBr(*FalseMBB);
+ CurMBB.addSuccessor(FalseMBB);
- MIRBuilder.setMBB(*FalseBB);
+ MIRBuilder.setMBB(*FalseMBB);
}
// handle default case
- MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
- MIRBuilder.buildBr(DefaultBB);
- MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+ const BasicBlock *DefaultBB = SwInst.getDefaultDest();
+ MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
+ MIRBuilder.buildBr(DefaultMBB);
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ CurMBB.addSuccessor(&DefaultMBB);
+ addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+
+ return true;
+}
+
+bool IRTranslator::translateIndirectBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
+
+ const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+ MIRBuilder.buildBrIndirect(Tgt);
+
+ // Link successors.
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : BrInst.successors())
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -240,47 +333,38 @@ bool IRTranslator::translateSwitch(const User &U,
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
- if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
- return false;
-
- assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOLoad;
unsigned Res = getOrCreateVReg(LI);
unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
- LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+
MIRBuilder.buildLoad(
Res, Addr,
*MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
Flags, DL->getTypeStoreSize(LI.getType()),
- getMemOpAlignment(LI)));
+ getMemOpAlignment(LI), AAMDNodes(), nullptr,
+ LI.getSyncScopeID(), LI.getOrdering()));
return true;
}
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
const StoreInst &SI = cast<StoreInst>(U);
-
- if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
- return false;
-
- assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOStore;
unsigned Val = getOrCreateVReg(*SI.getValueOperand());
unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
- LLT VTy{*SI.getValueOperand()->getType(), *DL},
- PTy{*SI.getPointerOperand()->getType(), *DL};
MIRBuilder.buildStore(
Val, Addr,
*MF->getMachineMemOperand(
MachinePointerInfo(SI.getPointerOperand()), Flags,
DL->getTypeStoreSize(SI.getValueOperand()->getType()),
- getMemOpAlignment(SI)));
+ getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(),
+ SI.getOrdering()));
return true;
}
@@ -290,6 +374,15 @@ bool IRTranslator::translateExtractValue(const User &U,
Type *Int32Ty = Type::getInt32Ty(U.getContext());
SmallVector<Value *, 1> Indices;
+ // If Src is a single element ConstantStruct, translate extractvalue
+ // to that element to avoid inserting a cast instruction.
+ if (auto CS = dyn_cast<ConstantStruct>(Src))
+ if (CS->getNumOperands() == 1) {
+ unsigned Res = getOrCreateVReg(*CS->getOperand(0));
+ ValToVReg[&U] = Res;
+ return true;
+ }
+
// getIndexedOffsetInType is designed for GEPs, so the first index is the
// usual array element rather than looking into the actual aggregate.
Indices.push_back(ConstantInt::get(Int32Ty, 0));
@@ -305,7 +398,7 @@ bool IRTranslator::translateExtractValue(const User &U,
uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+ MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset);
return true;
}
@@ -331,29 +424,36 @@ bool IRTranslator::translateInsertValue(const User &U,
uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
unsigned Res = getOrCreateVReg(U);
- const Value &Inserted = *U.getOperand(1);
- MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
- Offset);
+ unsigned Inserted = getOrCreateVReg(*U.getOperand(1));
+ MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset);
return true;
}
bool IRTranslator::translateSelect(const User &U,
MachineIRBuilder &MIRBuilder) {
- MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
- getOrCreateVReg(*U.getOperand(1)),
- getOrCreateVReg(*U.getOperand(2)));
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Tst = getOrCreateVReg(*U.getOperand(0));
+ unsigned Op0 = getOrCreateVReg(*U.getOperand(1));
+ unsigned Op1 = getOrCreateVReg(*U.getOperand(2));
+ MIRBuilder.buildSelect(Res, Tst, Op0, Op1);
return true;
}
bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+ // If we're bitcasting to the source type, we can reuse the source vreg.
+ if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
+ getLLTForType(*U.getType(), *DL)) {
+ // Get the source vreg now, to avoid invalidating ValToVReg.
+ unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
unsigned &Reg = ValToVReg[&U];
+ // If we already assigned a vreg for this bitcast, we can't change that.
+ // Emit a copy to satisfy the users we already emitted.
if (Reg)
- MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+ MIRBuilder.buildCopy(Reg, SrcReg);
else
- Reg = getOrCreateVReg(*U.getOperand(0));
+ Reg = SrcReg;
return true;
}
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
@@ -375,9 +475,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Value &Op0 = *U.getOperand(0);
unsigned BaseReg = getOrCreateVReg(Op0);
- LLT PtrTy{*Op0.getType(), *DL};
- unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
- LLT OffsetTy = LLT::scalar(PtrSize);
+ Type *PtrIRTy = Op0.getType();
+ LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
int64_t Offset = 0;
for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
@@ -399,8 +500,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
BaseReg = NewBaseReg;
@@ -408,8 +509,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
// N = N + Idx * ElementSize;
- unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+ unsigned ElementSizeReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
unsigned IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
@@ -428,8 +529,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
return true;
}
@@ -438,13 +538,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}
-bool IRTranslator::translateMemcpy(const CallInst &CI,
- MachineIRBuilder &MIRBuilder) {
- LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
- if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
- 0 ||
- cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
- 0 ||
+bool IRTranslator::translateMemfunc(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder,
+ unsigned ID) {
+ LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
+ Type *DstTy = CI.getArgOperand(0)->getType();
+ if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
return false;
@@ -454,14 +553,32 @@ bool IRTranslator::translateMemcpy(const CallInst &CI,
Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
}
- MachineOperand Callee = MachineOperand::CreateES("memcpy");
+ const char *Callee;
+ switch (ID) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy: {
+ Type *SrcTy = CI.getArgOperand(1)->getType();
+ if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
+ return false;
+ Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
+ break;
+ }
+ case Intrinsic::memset:
+ Callee = "memset";
+ break;
+ default:
+ return false;
+ }
- return CLI->lowerCall(MIRBuilder, Callee,
+ return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
+ MachineOperand::CreateES(Callee),
CallLowering::ArgInfo(0, CI.getType()), Args);
}
void IRTranslator::getStackGuard(unsigned DstReg,
MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
MIB.addDef(DstReg);
@@ -482,7 +599,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- LLT Ty{*CI.getOperand(0)->getType(), *DL};
+ LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL);
LLT s1 = LLT::scalar(1);
unsigned Width = Ty.getSizeInBits();
unsigned Res = MRI->createGenericVirtualRegister(Ty);
@@ -494,12 +611,12 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
.addUse(getOrCreateVReg(*CI.getOperand(1)));
if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
- unsigned Zero = MRI->createGenericVirtualRegister(s1);
- EntryBuilder.buildConstant(Zero, 0);
+ unsigned Zero = getOrCreateVReg(
+ *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
MIB.addUse(Zero);
}
- MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+ MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});
return true;
}
@@ -508,12 +625,83 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
switch (ID) {
default:
break;
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- // FIXME: these obviously need to be supported properly.
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // Stack coloring is not enabled in O0 (which we care about now) so we can
+ // drop these. Make sure someone notices when we start compiling at higher
+ // opts though.
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
+ return false;
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
+ assert(DI.getVariable() && "Missing variable");
+
+ const Value *Address = DI.getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return true;
+ }
+
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ auto AI = dyn_cast<AllocaInst>(Address);
+ if (AI && AI->isStaticAlloca()) {
+ // Static allocas are tracked at the MF level, no need for DBG_VALUE
+ // instructions (in fact, they get ignored if they *do* exist).
+ MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
+ getOrCreateFrameIndex(*AI), DI.getDebugLoc());
+ } else
+ MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ case Intrinsic::vaend:
+ // No target I know of cares about va_end. Certainly no in-tree target
+ // does. Simplest intrinsic ever!
+ return true;
+ case Intrinsic::vastart: {
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Ptr = CI.getArgOperand(0);
+ unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+
+ MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
+ .addUse(getOrCreateVReg(*Ptr))
+ .addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst &DI = cast<DbgValueInst>(CI);
+ const Value *V = DI.getValue();
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else {
+ unsigned Reg = getOrCreateVReg(*V);
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first palce, but it seems
+ // pretty baked in right now.
+ if (DI.getOffset() != 0)
+ MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ else
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(),
+ DI.getExpression());
+ }
return true;
+ }
case Intrinsic::uadd_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
case Intrinsic::sadd_with_overflow:
@@ -526,8 +714,43 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::pow:
+ MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ return true;
+ case Intrinsic::exp:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::exp2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::log2:
+ MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ case Intrinsic::fma:
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
+ return true;
case Intrinsic::memcpy:
- return translateMemcpy(CI, MIRBuilder);
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return translateMemfunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
unsigned Reg = getOrCreateVReg(CI);
@@ -546,7 +769,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
- LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
@@ -564,18 +787,41 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return false;
}
+bool IRTranslator::translateInlineAsm(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
+ if (!IA.getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA.hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA.getDialect() == InlineAsm::AD_Intel)
+ ExtraInfo |= InlineAsm::Extra_AsmDialect;
+
+ MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
+ .addExternalSymbol(IA.getAsmString().c_str())
+ .addImm(ExtraInfo);
+
+ return true;
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
+ if (CI.isInlineAsm())
+ return translateInlineAsm(CI, MIRBuilder);
+
if (!F || !F->isIntrinsic()) {
unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
SmallVector<unsigned, 8> Args;
for (auto &Arg: CI.arg_operands())
Args.push_back(getOrCreateVReg(*Arg));
- return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+ MF->getFrameInfo().setHasCalls(true);
+ return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
return getOrCreateVReg(*CI.getCalledValue());
});
}
@@ -594,11 +840,26 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
for (auto &Arg : CI.arg_operands()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
- MIB.addImm(CI->getSExtValue());
- else
- MIB.addUse(getOrCreateVReg(*Arg));
+ // Some intrinsics take metadata parameters. Reject them.
+ if (isa<MetadataAsValue>(Arg))
+ return false;
+ MIB.addUse(getOrCreateVReg(*Arg));
+ }
+
+ // Add a MachineMemOperand if it is a target mem intrinsic.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ TargetLowering::IntrinsicInfo Info;
+ // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
+ if (TLI.getTgtMemIntrinsic(Info, CI, ID)) {
+ MachineMemOperand::Flags Flags =
+ Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ Flags |=
+ Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore;
+ uint64_t Size = Info.memVT.getSizeInBits() >> 3;
+ MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
+ Flags, Size, Info.align));
}
+
return true;
}
@@ -610,7 +871,7 @@ bool IRTranslator::translateInvoke(const User &U,
const BasicBlock *ReturnBB = I.getSuccessor(0);
const BasicBlock *EHPadBB = I.getSuccessor(1);
- const Value *Callee(I.getCalledValue());
+ const Value *Callee = I.getCalledValue();
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
return false;
@@ -627,30 +888,30 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
-
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
- SmallVector<CallLowering::ArgInfo, 8> Args;
+ SmallVector<unsigned, 8> Args;
for (auto &Arg: I.arg_operands())
- Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ Args.push_back(getOrCreateVReg(*Arg));
- if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
- CallLowering::ArgInfo(Res, I.getType()), Args))
+ if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+ [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
// FIXME: track probabilities.
- MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
- &ReturnMBB = getOrCreateBB(*ReturnBB);
+ MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
+ &ReturnMBB = getMBB(*ReturnBB);
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+ MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -684,37 +945,161 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
+ LLT Ty = getLLTForType(*LP.getType(), *DL);
+ unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Undef);
+
+ SmallVector<LLT, 2> Tys;
+ for (Type *Ty : cast<StructType>(LP.getType())->elements())
+ Tys.push_back(getLLTForType(*Ty, *DL));
+ assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
+
// Mark exception register as live in.
- SmallVector<unsigned, 2> Regs;
- SmallVector<uint64_t, 2> Offsets;
- LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
- if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(0);
+ unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ if (!ExceptionReg)
+ return false;
+
+ MBB.addLiveIn(ExceptionReg);
+ unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]),
+ Tmp = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(VReg, ExceptionReg);
+ MIRBuilder.buildInsert(Tmp, Undef, VReg, 0);
+
+ unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ if (!SelectorReg)
+ return false;
+
+ MBB.addLiveIn(SelectorReg);
+
+ // N.b. the exception selector register always has pointer type and may not
+ // match the actual IR-level type in the landingpad so an extra cast is
+ // needed.
+ unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+
+ VReg = MRI->createGenericVirtualRegister(Tys[1]);
+ MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg);
+ MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg,
+ Tys[0].getSizeInBits());
+ return true;
+}
+
+bool IRTranslator::translateAlloca(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ auto &AI = cast<AllocaInst>(U);
+
+ if (AI.isStaticAlloca()) {
+ unsigned Res = getOrCreateVReg(AI);
+ int FI = getOrCreateFrameIndex(AI);
+ MIRBuilder.buildFrameIndex(Res, FI);
+ return true;
}
- if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(p0.getSizeInBits());
+ // Now we're in the harder dynamic case.
+ Type *Ty = AI.getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
+
+ unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+
+ Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
+ LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
+ if (MRI->getType(NumElts) != IntPtrTy) {
+ unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
+ NumElts = ExtElts;
+ }
+
+ unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ unsigned TySize =
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
+ MIRBuilder.buildMul(AllocSize, NumElts, TySize);
+
+ LLT PtrTy = getLLTForType(*AI.getType(), *DL);
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+
+ unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildCopy(SPTmp, SPReg);
+
+ unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
+
+ // Handle alignment. We have to realign if the allocation granule was smaller
+ // than stack alignment, or the specific alloca requires more than stack
+ // alignment.
+ unsigned StackAlign =
+ MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ Align = std::max(Align, StackAlign);
+ if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
+ AllocTmp = AlignedAlloc;
}
- MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+ MIRBuilder.buildCopy(SPReg, AllocTmp);
+ MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
+
+ MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
+ assert(MF->getFrameInfo().hasVarSizedObjects());
return true;
}
-bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
- MachineIRBuilder &MIRBuilder) {
- if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
- return false;
+bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
+ // FIXME: We may need more info about the type. Because of how LLT works,
+ // we're completely discarding the i64/double distinction here (amongst
+ // others). Fortunately the ABIs I know of where that matters don't use va_arg
+ // anyway but that's not guaranteed.
+ MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addImm(DL->getABITypeAlignment(U.getType()));
+ return true;
+}
- assert(AI.isStaticAlloca() && "only handle static allocas now");
- unsigned Res = getOrCreateVReg(AI);
- int FI = getOrCreateFrameIndex(AI);
- MIRBuilder.buildFrameIndex(Res, FI);
+bool IRTranslator::translateInsertElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+ MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
+ return true;
+}
+
+bool IRTranslator::translateExtractElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ unsigned Res = getOrCreateVReg(U);
+ unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ unsigned Idx = getOrCreateVReg(*U.getOperand(1));
+ MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
+ return true;
+}
+
+bool IRTranslator::translateShuffleVector(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addUse(getOrCreateVReg(*U.getOperand(1)))
+ .addUse(getOrCreateVReg(*U.getOperand(2)));
return true;
}
@@ -736,11 +1121,21 @@ void IRTranslator::finishPendingPhis() {
// won't create extra control flow here, otherwise we need to find the
// dominating predecessor here (or perhaps force the weirder IRTranslators
// to provide a simple boundary).
+ SmallSet<const BasicBlock *, 4> HandledPreds;
+
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
- assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
- "I appear to have misunderstood Machine PHIs");
- MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
- MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+ auto IRPred = PI->getIncomingBlock(i);
+ if (HandledPreds.count(IRPred))
+ continue;
+
+ HandledPreds.insert(IRPred);
+ unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i));
+ for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
+ assert(Pred->isSuccessor(MIB->getParent()) &&
+ "incorrect CFG at MachineBasicBlock level");
+ MIB.addUse(ValReg);
+ MIB.addMBB(Pred);
+ }
}
}
}
@@ -752,9 +1147,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
}
}
@@ -764,25 +1157,68 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
else if (auto CF = dyn_cast<ConstantFP>(&C))
EntryBuilder.buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
- EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+ EntryBuilder.buildUndef(Reg);
else if (isa<ConstantPointerNull>(C))
EntryBuilder.buildConstant(Reg, 0);
else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder.buildGlobalValue(Reg, GV);
- else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+ else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
+ if (!CAZ->getType()->isVectorTy())
+ return false;
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CAZ->getNumElements() == 1)
+ return translate(*CAZ->getElementValue(0u), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
+ Constant &Elt = *CAZ->getElementValue(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CV->getNumElements() == 1)
+ return translate(*CV->getElementAsConstant(0), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CV->getNumElements(); ++i) {
+ Constant &Elt = *CV->getElementAsConstant(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
+ }
+ } else if (auto CS = dyn_cast<ConstantStruct>(&C)) {
+ // Return the element if it is a single element ConstantStruct.
+ if (CS->getNumOperands() == 1) {
+ unsigned EltReg = getOrCreateVReg(*CS->getOperand(0));
+ EntryBuilder.buildCast(Reg, EltReg);
+ return true;
+ }
+ SmallVector<unsigned, 4> Ops;
+ SmallVector<uint64_t, 4> Indices;
+ uint64_t Offset = 0;
+ for (unsigned i = 0; i < CS->getNumOperands(); ++i) {
+ unsigned OpReg = getOrCreateVReg(*CS->getOperand(i));
+ Ops.push_back(OpReg);
+ Indices.push_back(Offset);
+ Offset += MRI->getType(OpReg).getSizeInBits();
+ }
+ EntryBuilder.buildSequence(Reg, Ops, Indices);
+ } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
+ if (CV->getNumOperands() == 1)
+ return translate(*CV->getOperand(0), Reg);
+ SmallVector<unsigned, 4> Ops;
+ for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
+ Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
- } else if (!TPC->isGlobalISelAbortEnabled())
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else
return false;
- else
- llvm_unreachable("unhandled constant kind");
return true;
}
@@ -793,7 +1229,12 @@ void IRTranslator::finalizeFunction() {
PendingPHIs.clear();
ValToVReg.clear();
FrameIndices.clear();
- Constants.clear();
+ MachinePreds.clear();
+ // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
+ // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
+ // destroying it twice (in ~IRTranslator() and ~LLVMContext())
+ EntryBuilder = MachineIRBuilder();
+ CurBuilder = MachineIRBuilder();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -807,85 +1248,97 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
+ ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
- // Setup a separate basic-block for the arguments and constants, falling
- // through to the IR-level Function's entry block.
+ // Release the per-function state when we return, whether we succeeded or not.
+ auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
+
+ // Setup a separate basic-block for the arguments and constants
MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
MF->push_back(EntryBB);
- EntryBB->addSuccessor(&getOrCreateBB(F.front()));
EntryBuilder.setMBB(*EntryBB);
+ // Create all blocks, in IR order, to preserve the layout.
+ for (const BasicBlock &BB: F) {
+ auto *&MBB = BBToMBB[&BB];
+
+ MBB = MF->CreateMachineBasicBlock(&BB);
+ MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
+ }
+
+ // Make our arguments/constants entry block fallthrough to the IR entry block.
+ EntryBB->addSuccessor(&getMBB(F.front()));
+
// Lower the actual args into this basic block.
SmallVector<unsigned, 8> VRegArgs;
for (const Argument &Arg: F.args())
VRegArgs.push_back(getOrCreateVReg(Arg));
- bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
- if (!Succeeded) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- finalizeFunction();
- return false;
- }
- report_fatal_error("Unable to lower arguments");
+ if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
// And translate the function!
for (const BasicBlock &BB: F) {
- MachineBasicBlock &MBB = getOrCreateBB(BB);
+ MachineBasicBlock &MBB = getMBB(BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
for (const Instruction &Inst: BB) {
- Succeeded &= translate(Inst);
- if (!Succeeded) {
- if (TPC->isGlobalISelAbortEnabled())
- reportTranslationError(Inst, "unable to translate instruction");
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- break;
- }
- }
- }
-
- if (Succeeded) {
- finishPendingPhis();
-
- // Now that the MachineFrameInfo has been configured, no further changes to
- // the reserved registers are possible.
- MRI->freezeReservedRegs(*MF);
-
- // Merge the argument lowering and constants block with its single
- // successor, the LLVM-IR entry block. We want the basic block to
- // be maximal.
- assert(EntryBB->succ_size() == 1 &&
- "Custom BB used for lowering should have only one successor");
- // Get the successor of the current entry block.
- MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
- assert(NewEntryBB.pred_size() == 1 &&
- "LLVM-IR entry block has a predecessor!?");
- // Move all the instruction from the current entry block to the
- // new entry block.
- NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
- EntryBB->end());
-
- // Update the live-in information for the new entry block.
- for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
- NewEntryBB.addLiveIn(LiveIn);
- NewEntryBB.sortUniqueLiveIns();
+ if (translate(Inst))
+ continue;
- // Get rid of the now empty basic block.
- EntryBB->removeSuccessor(&NewEntryBB);
- MF->remove(EntryBB);
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
- assert(&MF->front() == &NewEntryBB &&
- "New entry wasn't next in the list of basic block!");
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), &BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst)
+ << ": '" << InstStr.str() << "'";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
}
- finalizeFunction();
+ finishPendingPhis();
+
+ // Merge the argument lowering and constants block with its single
+ // successor, the LLVM-IR entry block. We want the basic block to
+ // be maximal.
+ assert(EntryBB->succ_size() == 1 &&
+ "Custom BB used for lowering should have only one successor");
+ // Get the successor of the current entry block.
+ MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+ assert(NewEntryBB.pred_size() == 1 &&
+ "LLVM-IR entry block has a predecessor!?");
+ // Move all the instruction from the current entry block to the
+ // new entry block.
+ NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+ EntryBB->end());
+
+ // Update the live-in information for the new entry block.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+ NewEntryBB.addLiveIn(LiveIn);
+ NewEntryBB.sortUniqueLiveIns();
+
+ // Get rid of the now empty basic block.
+ EntryBB->removeSuccessor(&NewEntryBB);
+ MF->remove(EntryBB);
+ MF->DeleteMachineBasicBlock(EntryBB);
+
+ assert(&MF->front() == &NewEntryBB &&
+ "New entry wasn't next in the list of basic block!");
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 1d205cd..a16e14f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,14 +12,19 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#define DEBUG_TYPE "instruction-select"
@@ -44,17 +49,14 @@ void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
- const MachineFunction &MF = *MI->getParent()->getParent();
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ":\nIn function: " << MF.getName() << '\n';
- if (MI)
- Err << *MI << '\n';
- report_fatal_error(Err.str());
-}
-
bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // No matter what happens, whether we successfully select the function or not,
+ // nothing is going to use the vreg types after us. Make sure they disappear.
+ auto ClearVRegTypesOnReturn =
+ make_scope_exit([&]() { MRI.getVRegToType().clear(); });
+
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
@@ -66,10 +68,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
assert(ISel && "Cannot work without InstructionSelector");
- // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
- // other MF/MFI fields we need to initialize.
+ // An optimization remark emitter. Used to report failures.
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- const MachineRegisterInfo &MRI = MF.getRegInfo();
+ // FIXME: There are many other MF/MFI fields we need to initialize.
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
@@ -80,17 +82,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that it has the same layering problem, but we only use inline methods so
// end up not needing to link against the GlobalISel library.
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
- for (const MachineBasicBlock &MBB : MF)
- for (const MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
- reportSelectionError(&MI, "Instruction is not legal");
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "instruction is not legal", MI);
+ return false;
+ }
#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
- bool Failed = false;
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@@ -115,14 +119,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Selecting: \n " << MI);
+ // We could have folded this instruction away already, making it dead.
+ // If so, erase it.
+ if (isTriviallyDead(MI, MRI)) {
+ DEBUG(dbgs() << "Is dead; erasing.\n");
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+
if (!ISel->select(MI)) {
- if (TPC.isGlobalISelAbortEnabled())
- // FIXME: It would be nice to dump all inserted instructions. It's
- // not
- // obvious how, esp. considering select() can insert after MI.
- reportSelectionError(&MI, "Cannot select");
- Failed = true;
- break;
+ // FIXME: It would be nice to dump all inserted instructions. It's
+ // not obvious how, esp. considering select() can insert after MI.
+ reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
+ return false;
}
// Dump the range of instructions that MI expanded into.
@@ -136,39 +145,47 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
// Now that selection is complete, there are no more generic vregs. Verify
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
for (auto &VRegToType : MRI.getVRegToType()) {
unsigned VReg = VRegToType.first;
auto *RC = MRI.getRegClassOrNull(VReg);
- auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
- ? nullptr
- : &*MRI.def_instr_begin(VReg);
- if (!RC) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(MI, "VReg as no regclass after selection");
- Failed = true;
- break;
- }
+ MachineInstr *MI = nullptr;
+ if (!MRI.def_empty(VReg))
+ MI = &*MRI.def_instr_begin(VReg);
+ else if (!MRI.use_empty(VReg))
+ MI = &*MRI.use_instr_begin(VReg);
+
+ if (MI && !RC) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has no regclass after selection", *MI);
+ return false;
+ } else if (!RC)
+ continue;
if (VRegToType.second.isValid() &&
- VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(
- MI, "VReg has explicit size different from class size");
- Failed = true;
- break;
+ VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has explicit size different from class size",
+ *MI);
+ return false;
}
}
- MRI.getVRegToType().clear();
-
- if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (MF.size() != NumBlocks) {
+ MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
+ MF.getFunction()->getSubprogram(),
+ /*MBB=*/nullptr);
+ R << "inserting blocks is not supported yet";
+ reportGISelFailure(MF, TPC, MORE, R);
return false;
}
- assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
+
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+ TLI.finalizeLowering(MF);
// FIXME: Should we accurately track changes?
return true;
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5c34da0..bf42722 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,41 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
#define DEBUG_TYPE "instructionselector"
using namespace llvm;
-InstructionSelector::InstructionSelector() {}
+InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers, nullptr), MIs() {}
+
+InstructionSelector::InstructionSelector() = default;
+
+bool InstructionSelector::constrainOperandRegToRegClass(
+ MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
+ const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ return
+ constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
+}
bool InstructionSelector::constrainSelectedInstRegOperands(
MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
@@ -55,6 +79,28 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
// constrainOperandRegClass does that for us.
MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
Reg, OpI));
+
+ // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
+ // done.
+ if (MO.isUse()) {
+ int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
+ if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx))
+ I.tieOperands(DefIdx, OpI);
+ }
}
return true;
}
+
+bool InstructionSelector::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+ if (MO.isReg() && MO.getReg())
+ if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
+ return *VRegVal == Value;
+ return false;
+}
+
+bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const {
+ return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
+ MI.implicit_operands().begin() == MI.implicit_operands().end();
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index e863568..b699156 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -15,13 +15,16 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <iterator>
+
#define DEBUG_TYPE "legalizer"
using namespace llvm;
@@ -47,71 +50,79 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
void Legalizer::init(MachineFunction &MF) {
}
-bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII) {
- bool Changed = false;
- if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
- return Changed;
+bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ MachineIRBuilder &MIRBuilder) {
+ if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
- unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
+ unsigned NumDefs = MI.getNumOperands() - 1;
unsigned SrcReg = MI.getOperand(NumDefs).getReg();
- MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
- if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
- return Changed;
-
- unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
- bool AllDefsReplaced = true;
-
- // Try to match each register extracted with a corresponding insertion formed
- // by the G_SEQUENCE.
- for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
- MachineOperand &ExtractMO = MI.getOperand(Idx);
- assert(ExtractMO.isReg() && ExtractMO.isDef() &&
- "unexpected extract operand");
-
- unsigned ExtractReg = ExtractMO.getReg();
- unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
-
- while (SeqIdx < NumSeqSrcs &&
- SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
- ++SeqIdx;
-
- if (SeqIdx == NumSeqSrcs) {
- AllDefsReplaced = false;
- continue;
- }
+ MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg);
+ if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
+ return false;
- unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
- if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
- MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
- AllDefsReplaced = false;
- continue;
- }
+ const unsigned NumMergeRegs = MergeI.getNumOperands() - 1;
+
+ if (NumMergeRegs < NumDefs) {
+ if (NumDefs % NumMergeRegs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to UNMERGEs, for example
+ // %1 = G_MERGE_VALUES %4, %5
+ // %9, %10, %11, %12 = G_UNMERGE_VALUES %1
+ // to
+ // %9, %10 = G_UNMERGE_VALUES %4
+ // %11, %12 = G_UNMERGE_VALUES %5
- assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
- "unexpected physical register in G_SEQUENCE");
+ const unsigned NewNumDefs = NumDefs / NumMergeRegs;
+ for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
+ SmallVector<unsigned, 2> DstRegs;
+ for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
+ ++j, ++DefIdx)
+ DstRegs.push_back(MI.getOperand(DefIdx).getReg());
- // Finally we can replace the uses.
- for (auto &Use : MRI.use_operands(ExtractReg)) {
- Changed = true;
- Use.setReg(OrigReg);
+ MIRBuilder.buildUnmerge(DstRegs, MergeI.getOperand(Idx + 1).getReg());
}
- }
- if (AllDefsReplaced) {
- // If SeqI was the next instruction in the BB and we removed it, we'd break
- // the outer iteration.
- assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
- "G_SEQUENCE does not dominate G_EXTRACT");
+ } else if (NumMergeRegs > NumDefs) {
+ if (NumMergeRegs % NumDefs != 0)
+ return false;
+
+ MIRBuilder.setInstr(MI);
+ // Transform to MERGEs
+ // %6 = G_MERGE_VALUES %17, %18, %19, %20
+ // %7, %8 = G_UNMERGE_VALUES %6
+ // to
+ // %7 = G_MERGE_VALUES %17, %18
+ // %8 = G_MERGE_VALUES %19, %20
+
+ const unsigned NumRegs = NumMergeRegs / NumDefs;
+ for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
+ SmallVector<unsigned, 2> Regs;
+ for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; ++j, ++Idx)
+ Regs.push_back(MergeI.getOperand(Idx).getReg());
+
+ MIRBuilder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs);
+ }
- MI.eraseFromParent();
+ } else {
+ // FIXME: is a COPY appropriate if the types mismatch? We know both
+ // registers are allocatable by now.
+ if (MRI.getType(MI.getOperand(0).getReg()) !=
+ MRI.getType(MergeI.getOperand(1).getReg()))
+ return false;
- if (MRI.use_empty(SrcReg))
- SeqI.eraseFromParent();
- Changed = true;
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+ MergeI.getOperand(Idx + 1).getReg());
}
- return Changed;
+ MI.eraseFromParent();
+ if (MRI.use_empty(MergeI.getOperand(0).getReg()))
+ MergeI.eraseFromParent();
+ return true;
}
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
@@ -122,7 +133,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
LegalizerHelper Helper(MF);
// FIXME: an instruction may need more than one pass before it is legal. For
@@ -132,7 +143,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// convergence for performance reasons.
bool Changed = false;
MachineBasicBlock::iterator NextMI;
- for (auto &MBB : MF)
+ for (auto &MBB : MF) {
for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
@@ -142,27 +153,52 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// and are assumed to be legal.
if (!isPreISelGenericOpcode(MI->getOpcode()))
continue;
-
- auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
-
- // Error out if we couldn't legalize this instruction. We may want to fall
- // back to DAG ISel instead in the future.
- if (Res == LegalizerHelper::UnableToLegalize) {
- if (!TPC.isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
+ unsigned NumNewInsns = 0;
+ SmallVector<MachineInstr *, 4> WorkList;
+ Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+ // Only legalize pre-isel generic instructions.
+ // Legalization process could generate Target specific pseudo
+ // instructions with generic types. Don't record them
+ if (isPreISelGenericOpcode(MI->getOpcode())) {
+ ++NumNewInsns;
+ WorkList.push_back(MI);
}
- std::string Msg;
- raw_string_ostream OS(Msg);
- OS << "unable to legalize instruction: ";
- MI->print(OS);
- report_fatal_error(OS.str());
- }
-
- Changed |= Res == LegalizerHelper::Legalized;
- }
+ });
+ WorkList.push_back(&*MI);
+
+ bool Changed = false;
+ LegalizerHelper::LegalizeResult Res;
+ unsigned Idx = 0;
+ do {
+ Res = Helper.legalizeInstrStep(*WorkList[Idx]);
+ // Error out if we couldn't legalize this instruction. We may want to
+ // fall back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ Helper.MIRBuilder.stopRecordingInsertions();
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction",
+ *WorkList[Idx]);
+ return false;
+ }
+ }
+ Changed |= Res == LegalizerHelper::Legalized;
+ ++Idx;
+
+#ifndef NDEBUG
+ if (NumNewInsns) {
+ DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n");
+ for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end();
+ I != E; ++I)
+ DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs()));
+ NumNewInsns = 0;
+ }
+#endif
+ } while (Idx < WorkList.size());
+ Helper.MIRBuilder.stopRecordingInsertions();
+ }
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -171,8 +207,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Get the next Instruction before we try to legalize, because there's a
// good chance MI will be deleted.
NextMI = std::next(MI);
-
- Changed |= combineExtracts(*MI, MRI, TII);
+ Changed |= combineMerges(*MI, MRI, TII, Helper.MIRBuilder);
}
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index eb25b6c..5258370 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -24,120 +24,174 @@
#include <sstream>
-#define DEBUG_TYPE "legalize-mir"
+#define DEBUG_TYPE "legalizer"
using namespace llvm;
LegalizerHelper::LegalizerHelper(MachineFunction &MF)
- : MRI(MF.getRegInfo()) {
+ : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
MIRBuilder.setMF(MF);
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- auto Action = LegalizerInfo.getAction(MI, MRI);
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+ DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+
+ auto Action = LI.getAction(MI, MRI);
switch (std::get<0>(Action)) {
case LegalizerInfo::Legal:
+ DEBUG(dbgs() << ".. Already legal\n");
return AlreadyLegal;
case LegalizerInfo::Libcall:
+ DEBUG(dbgs() << ".. Convert to libcall\n");
return libcall(MI);
case LegalizerInfo::NarrowScalar:
+ DEBUG(dbgs() << ".. Narrow scalar\n");
return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::WidenScalar:
+ DEBUG(dbgs() << ".. Widen scalar\n");
return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::Lower:
+ DEBUG(dbgs() << ".. Lower\n");
return lower(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::FewerElements:
+ DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::Custom:
+ DEBUG(dbgs() << ".. Custom legalization\n");
+ return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
+ : UnableToLegalize;
default:
+ DEBUG(dbgs() << ".. Unable to legalize\n");
return UnableToLegalize;
}
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstr(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- SmallVector<MachineInstr *, 4> WorkList;
- MIRBuilder.recordInsertions(
- [&](MachineInstr *MI) { WorkList.push_back(MI); });
- WorkList.push_back(&MI);
-
- bool Changed = false;
- LegalizeResult Res;
- unsigned Idx = 0;
- do {
- Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
- if (Res == UnableToLegalize) {
- MIRBuilder.stopRecordingInsertions();
- return UnableToLegalize;
- }
- Changed |= Res == Legalized;
- ++Idx;
- } while (Idx < WorkList.size());
-
- MIRBuilder.stopRecordingInsertions();
-
- return Changed ? Legalized : AlreadyLegal;
-}
-
void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
SmallVectorImpl<unsigned> &VRegs) {
- unsigned Size = Ty.getSizeInBits();
- SmallVector<uint64_t, 4> Indexes;
- for (int i = 0; i < NumParts; ++i) {
+ for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- Indexes.push_back(i * Size);
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
+ switch (Opcode) {
+ case TargetOpcode::G_SDIV:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::SDIV_I32;
+ case TargetOpcode::G_UDIV:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::UDIV_I32;
+ case TargetOpcode::G_SREM:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::SREM_I32;
+ case TargetOpcode::G_UREM:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::UREM_I32;
+ case TargetOpcode::G_FADD:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+ case TargetOpcode::G_FREM:
+ return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
+ case TargetOpcode::G_FPOW:
+ return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
}
- MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+ llvm_unreachable("Unknown libcall function");
+}
+
+LegalizerHelper::LegalizeResult
+llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
+ const CallLowering::ArgInfo &Result,
+ ArrayRef<CallLowering::ArgInfo> Args) {
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ const char *Name = TLI.getLibcallName(Libcall);
+
+ MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
+ if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
+ MachineOperand::CreateES(Name), Result, Args))
+ return LegalizerHelper::UnableToLegalize;
+
+ return LegalizerHelper::Legalized;
+}
+
+static LegalizerHelper::LegalizeResult
+simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
+ Type *OpType) {
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
+ {{MI.getOperand(1).getReg(), OpType},
+ {MI.getOperand(2).getReg(), OpType}});
}
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- unsigned Size = Ty.getSizeInBits();
+ LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = LLTy.getSizeInBits();
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+
MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ Type *HLTy = Type::getInt32Ty(Ctx);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
- auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
- Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
- auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- const char *Name =
- TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
-
- CLI.lowerCall(
- MIRBuilder, MachineOperand::CreateES(Name),
- {MI.getOperand(0).getReg(), Ty},
- {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
- MI.eraseFromParent();
- return Legalized;
+ Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
+ auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
+ if (Status != Legalized)
+ return Status;
+ break;
}
}
+
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0)
+ if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
return UnableToLegalize;
+
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+ NarrowTy.getSizeInBits();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(Dst);
+ DstRegs.push_back(Dst);
+ }
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
- unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
NarrowTy.getSizeInBits();
- MIRBuilder.setInstr(MI);
-
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -152,11 +206,193 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Src2Regs[i], CarryIn);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
CarryIn = CarryOut;
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(0).getReg();
+ int64_t OpStart = MI.getOperand(2).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ unsigned SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(2).getReg();
+ int64_t OpStart = MI.getOperand(3).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstStart = i * NarrowSize;
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset, SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ unsigned SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_LOAD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT OffsetTy = LLT::scalar(
+ MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned SrcReg = 0;
+ unsigned Adjustment = i * NarrowSize / 8;
+
+ MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
+ Adjustment);
+
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin());
+
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT OffsetTy = LLT::scalar(
+ MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
+
+ SmallVector<unsigned, 2> SrcRegs;
+ extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = 0;
+ unsigned Adjustment = i * NarrowSize / 8;
+
+ MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
+ Adjustment);
+
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin());
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ ConstantInt *CI =
+ ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
+ MIRBuilder.buildConstant(DstReg, *CI);
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -175,7 +411,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
- case TargetOpcode::G_SUB: {
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHL: {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
@@ -195,10 +432,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV: {
- unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV ||
+ MI.getOpcode() == TargetOpcode::G_SREM ||
+ MI.getOpcode() == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
@@ -218,6 +461,85 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SELECT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ // Perform operation at larger width (any extension is fine here, high bits
+ // don't affect the result) and then truncate the result back to the
+ // original type.
+ unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+ unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg());
+ MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg());
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(TargetOpcode::G_SELECT)
+ .addDef(DstExt)
+ .addReg(MI.getOperand(1).getReg())
+ .addUse(Src1Ext)
+ .addUse(Src2Ext);
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstExt)
+ .addUse(MI.getOperand(1).getReg());
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+
+ if (MI.getOpcode() == TargetOpcode::G_SITOFP) {
+ MIRBuilder.buildSExt(SrcExt, Src);
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op");
+ MIRBuilder.buildZExt(SrcExt, Src);
+ }
+
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(SrcExt);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(SrcExt, Src);
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(),
+ MI.getOperand(3).getImm());
+ for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) {
+ MIB.addReg(MI.getOperand(OpNum).getReg());
+ MIB.addImm(MI.getOperand(OpNum + 1).getImm());
+ }
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_LOAD: {
assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
WideTy.getSizeInBits() &&
@@ -231,12 +553,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_STORE: {
- assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
- WideTy.getSizeInBits() &&
- "illegal to increase number of bytes modified by a store");
+ if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
+ WideTy != LLT::scalar(8))
+ return UnableToLegalize;
+
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto Content = TLI.getBooleanContents(false, false);
+
+ unsigned ExtOp = TargetOpcode::G_ANYEXT;
+ if (Content == TargetLoweringBase::ZeroOrOneBooleanContent)
+ ExtOp = TargetOpcode::G_ZEXT;
+ else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
+ ExtOp = TargetOpcode::G_SEXT;
+ else
+ ExtOp = TargetOpcode::G_ANYEXT;
unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+ MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse(
+ MI.getOperand(0).getReg());
MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
**MI.memoperands_begin());
MI.eraseFromParent();
@@ -315,6 +649,83 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
+ // result.
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned Overflow = MI.getOperand(1).getReg();
+ unsigned LHS = MI.getOperand(2).getReg();
+ unsigned RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildMul(Res, LHS, RHS);
+
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
+ ? TargetOpcode::G_SMULH
+ : TargetOpcode::G_UMULH;
+
+ unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(Opcode)
+ .addDef(HiPart)
+ .addUse(LHS)
+ .addUse(RHS);
+
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FNEG: {
+ // TODO: Handle vector types once we are able to
+ // represent them.
+ if (Ty.isVector())
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ Type *ZeroTy;
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ ZeroTy = Type::getHalfTy(Ctx);
+ break;
+ case 32:
+ ZeroTy = Type::getFloatTy(Ctx);
+ break;
+ case 64:
+ ZeroTy = Type::getDoubleTy(Ctx);
+ break;
+ default:
+ llvm_unreachable("unexpected floating-point type");
+ }
+ ConstantFP &ZeroForNegation =
+ *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildFConstant(Zero, ZeroForNegation);
+ MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
+ .addDef(Res)
+ .addUse(Zero)
+ .addUse(MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FSUB: {
+ // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
+ // First, check if G_FNEG is marked as Lower. If so, we may
+ // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
+ if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower)
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned LHS = MI.getOperand(1).getReg();
+ unsigned RHS = MI.getOperand(2).getReg();
+ unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD)
+ .addDef(Res)
+ .addUse(LHS)
+ .addUse(Neg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -335,7 +746,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.setInstr(MI);
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -343,10 +753,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
}
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index e496620..76917aa 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,4 +1,4 @@
-//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,16 +18,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOpcodes.h"
+#include <algorithm>
+#include <cassert>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
-LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+LegalizerInfo::LegalizerInfo() {
+ DefaultActions[TargetOpcode::G_IMPLICIT_DEF] = NarrowScalar;
+
// FIXME: these two can be legalized to the fundamental load/store Jakob
// proposed. Once loads & stores are supported.
DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
@@ -41,6 +50,9 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+ DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_EXTRACT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
void LegalizerInfo::computeTables() {
@@ -71,28 +83,34 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// These *have* to be implemented for now, they're the fundamental basis of
// how everything else is transformed.
- // Nothing is going to go well with types that aren't a power of 2 yet, so
- // don't even try because we might make things worse.
- if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
- return std::make_pair(Unsupported, LLT());
-
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
- if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
- Aspect.Opcode == TargetOpcode::G_EXTRACT)
+ if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
+ LLT Ty = Aspect.Type;
LegalizeAction Action = findInActions(Aspect);
+ // LegalizerHelper is not able to handle non-power-of-2 types right now, so do
+ // not try to legalize them unless they are marked as Legal or Custom.
+ // FIXME: This is a temporary hack until the general non-power-of-2
+ // legalization works.
+ if (!isPowerOf2_64(Ty.getSizeInBits()) &&
+ !(Action == Legal || Action == Custom))
+ return std::make_pair(Unsupported, LLT());
+
if (Action != NotFound)
return findLegalAction(Aspect, Action);
unsigned Opcode = Aspect.Opcode;
- LLT Ty = Aspect.Type;
if (!Ty.isVector()) {
auto DefaultAction = DefaultActions.find(Aspect.Opcode);
if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
return std::make_pair(Legal, Ty);
+ if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower)
+ return std::make_pair(Lower, Ty);
+
if (DefaultAction == DefaultActions.end() ||
DefaultAction->second != NarrowScalar)
return std::make_pair(Unsupported, LLT());
@@ -152,7 +170,7 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
return std::get<0>(getAction(MI, MRI)) == Legal;
}
-LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
+Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
LegalizeAction Action) const {
switch(Action) {
default:
@@ -160,23 +178,30 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Legal:
case Lower:
case Libcall:
+ case Custom:
return Aspect.Type;
case NarrowScalar: {
- return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
}
case WidenScalar: {
- return findLegalType(Aspect, [&](LLT Ty) -> LLT {
+ return findLegalizableSize(Aspect, [&](LLT Ty) -> LLT {
return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
});
}
case FewerElements: {
- return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.halfElements(); });
}
case MoreElements: {
- return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ return findLegalizableSize(
+ Aspect, [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
}
}
}
+
+bool LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
new file mode 100644
index 0000000..c5d0999
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -0,0 +1,123 @@
+//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the Localizer class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "localizer"
+
+using namespace llvm;
+
+char Localizer::ID = 0;
+INITIALIZE_PASS(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use", false,
+ false)
+
+Localizer::Localizer() : MachineFunctionPass(ID) {
+ initializeLocalizerPass(*PassRegistry::getPassRegistry());
+}
+
+void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+
+bool Localizer::shouldLocalize(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ // Constants-like instructions should be close to their users.
+ // We don't want long live-ranges for them.
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FRAME_INDEX:
+ return true;
+ }
+}
+
+bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
+ MachineBasicBlock *&InsertMBB) {
+ MachineInstr &MIUse = *MOUse.getParent();
+ InsertMBB = MIUse.getParent();
+ if (MIUse.isPHI())
+ InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
+ return InsertMBB == Def.getParent();
+}
+
+bool Localizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
+
+ init(MF);
+
+ bool Changed = false;
+ // Keep track of the instructions we localized.
+ // We won't need to process them if we see them later in the CFG.
+ SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+ // TODO: Do bottom up traversal.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
+ continue;
+ DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ unsigned Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+ MOIt != MOItEnd;) {
+ MachineOperand &MOUse = *MOIt++;
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB))
+ continue;
+ DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ // Don't try to be smart for the insertion point.
+ // There is no guarantee that the first seen use is the first
+ // use in the block.
+ InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI);
+
+ // Set a new register for the definition.
+ unsigned NewReg =
+ MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index c04f6e4..4636806 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -54,7 +55,7 @@ void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
void MachineIRBuilder::recordInsertions(
std::function<void(MachineInstr *)> Inserted) {
- InsertedInstr = Inserted;
+ InsertedInstr = std::move(Inserted);
}
void MachineIRBuilder::stopRecordingInsertions() {
@@ -82,6 +83,70 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue(
+ unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addReg(0, RegState::Debug)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue(
+ unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ unsigned Offset,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_VALUE);
+ if (auto *CI = dyn_cast<ConstantInt>(&C)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getZExtValue());
+ } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
+ MIB.addFPImm(CFP);
+ } else {
+ // Insert %noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(0U);
+ }
+
+ return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr);
+}
+
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
assert(MRI->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
@@ -101,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
.addGlobalAddress(GV);
}
-MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0,
unsigned Op1) {
assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
"invalid operand type");
assert(MRI->getType(Res) == MRI->getType(Op0) &&
MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
- return buildInstr(TargetOpcode::G_ADD)
+ return buildInstr(Opcode)
.addDef(Res)
.addUse(Op0)
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1);
+}
+
MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
unsigned Op1) {
assert(MRI->getType(Res).isPointer() &&
@@ -126,37 +196,67 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
.addUse(Op1);
}
-MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
- "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+Optional<MachineInstrBuilder>
+MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+ const LLT &ValueTy, uint64_t Value) {
+ assert(Res == 0 && "Res is a result argument");
+ assert(ValueTy.isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_SUB)
+ if (Value == 0) {
+ Res = Op0;
+ return None;
+ }
+
+ Res = MRI->createGenericVirtualRegister(MRI->getType(Op0));
+ unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy);
+
+ buildConstant(TmpReg, Value);
+ return buildGEP(Res, Op0, TmpReg);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+ uint32_t NumBits) {
+ assert(MRI->getType(Res).isPointer() &&
+ MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_PTR_MASK)
.addDef(Res)
.addUse(Op0)
- .addUse(Op1);
+ .addImm(NumBits);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1);
}
MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
unsigned Op1) {
- assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
- "invalid operand type");
- assert(MRI->getType(Res) == MRI->getType(Op0) &&
- MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+ return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1);
+}
- return buildInstr(TargetOpcode::G_MUL)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1);
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ assert(MRI->getType(Tgt).isPointer() && "invalid branch destination");
+ return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+ assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() ||
+ MRI->getType(Res) == MRI->getType(Op));
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -253,49 +353,78 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
+
unsigned Opcode = TargetOpcode::COPY;
if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_SEXT;
else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
- ArrayRef<uint64_t> Indices,
- unsigned Src) {
-#ifndef NDEBUG
- assert(Results.size() == Indices.size() && "inconsistent number of regs");
- assert(!Results.empty() && "invalid trivial extract");
- assert(std::is_sorted(Indices.begin(), Indices.end()) &&
- "extract offsets must be in ascending order");
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector());
+ assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar());
- assert(MRI->getType(Src).isValid() && "invalid operand type");
- for (auto Res : Results)
- assert(MRI->getType(Res).isValid() && "invalid operand type");
-#endif
+ unsigned Opcode = TargetOpcode::COPY;
+ if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_ZEXT;
+ else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_TRUNC;
+ else
+ assert(MRI->getType(Res) == MRI->getType(Op));
- auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
- for (auto Res : Results)
- MIB.addDef(Res);
+ return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
- MIB.addUse(Src);
+MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
+ LLT SrcTy = MRI->getType(Src);
+ LLT DstTy = MRI->getType(Dst);
+ if (SrcTy == DstTy)
+ return buildCopy(Dst, Src);
+
+ unsigned Opcode;
+ if (SrcTy.isPointer() && DstTy.isScalar())
+ Opcode = TargetOpcode::G_PTRTOINT;
+ else if (DstTy.isPointer() && SrcTy.isScalar())
+ Opcode = TargetOpcode::G_INTTOPTR;
+ else {
+ assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+ Opcode = TargetOpcode::G_BITCAST;
+ }
- for (auto Idx : Indices)
- MIB.addImm(Idx);
+ return buildInstr(Opcode).addDef(Dst).addUse(Src);
+}
- getMBB().insert(getInsertPt(), MIB);
- if (InsertedInstr)
- InsertedInstr(MIB);
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+ uint64_t Index) {
+#ifndef NDEBUG
+ assert(MRI->getType(Src).isValid() && "invalid operand type");
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+ assert(Index + MRI->getType(Res).getSizeInBits() <=
+ MRI->getType(Src).getSizeInBits() &&
+ "extracting off end of register");
+#endif
- return MIB;
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Res, Src);
+ }
+
+ return buildInstr(TargetOpcode::G_EXTRACT)
+ .addDef(Res)
+ .addUse(Src)
+ .addImm(Index);
}
-MachineInstrBuilder
-MachineIRBuilder::buildSequence(unsigned Res,
- ArrayRef<unsigned> Ops,
- ArrayRef<uint64_t> Indices) {
+void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
assert(!Ops.empty() && "invalid trivial sequence");
@@ -307,15 +436,97 @@ MachineIRBuilder::buildSequence(unsigned Res,
assert(MRI->getType(Op).isValid() && "invalid operand type");
#endif
- MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
- MIB.addDef(Res);
+ LLT ResTy = MRI->getType(Res);
+ LLT OpTy = MRI->getType(Ops[0]);
+ unsigned OpSize = OpTy.getSizeInBits();
+ bool MaybeMerge = true;
for (unsigned i = 0; i < Ops.size(); ++i) {
- MIB.addUse(Ops[i]);
- MIB.addImm(Indices[i]);
+ if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
+ MaybeMerge = false;
+ break;
+ }
+ }
+
+ if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) {
+ buildMerge(Res, Ops);
+ return;
}
+
+ unsigned ResIn = MRI->createGenericVirtualRegister(ResTy);
+ buildUndef(ResIn);
+
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ unsigned ResOut =
+ i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy);
+ buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
+ ResIn = ResOut;
+ }
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
+ return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
+ ArrayRef<unsigned> Ops) {
+
+#ifndef NDEBUG
+ assert(!Ops.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Ops[0]);
+ for (auto Reg : Ops)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() ==
+ MRI->getType(Res).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ if (Ops.size() == 1)
+ return buildCast(Res, Ops[0]);
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
+ MIB.addDef(Res);
+ for (unsigned i = 0; i < Ops.size(); ++i)
+ MIB.addUse(Ops[i]);
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+ unsigned Op) {
+
+#ifndef NDEBUG
+ assert(!Res.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Res[0]);
+ for (auto Reg : Res)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() ==
+ MRI->getType(Op).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+ for (unsigned i = 0; i < Res.size(); ++i)
+ MIB.addDef(Res[i]);
+ MIB.addUse(Op);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+ unsigned Op, unsigned Index) {
+ assert(Index + MRI->getType(Op).getSizeInBits() <=
+ MRI->getType(Res).getSizeInBits() &&
+ "insertion past the end of a register");
+
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
+ return buildCast(Res, Op);
+ }
+
+ return buildInstr(TargetOpcode::G_INSERT)
+ .addDef(Res)
+ .addUse(Src)
+ .addUse(Op)
+ .addImm(Index);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
unsigned Res,
bool HasSideEffects) {
@@ -395,9 +606,10 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
if (ResTy.isScalar() || ResTy.isPointer())
assert(MRI->getType(Tst).isScalar() && "type mismatch");
else
- assert(MRI->getType(Tst).isVector() &&
- MRI->getType(Tst).getNumElements() ==
- MRI->getType(Op0).getNumElements() &&
+ assert((MRI->getType(Tst).isScalar() ||
+ (MRI->getType(Tst).isVector() &&
+ MRI->getType(Tst).getNumElements() ==
+ MRI->getType(Op0).getNumElements())) &&
"type mismatch");
#endif
@@ -408,6 +620,47 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Elt,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT EltTy = MRI->getType(Elt);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
+ assert(IdxTy.isScalar() && "invalid operand type");
+ assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
+ assert(ResTy.getElementType() == EltTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Elt)
+ .addUse(Idx);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ValTy.isVector() && "invalid operand type");
+ assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");
+ assert(IdxTy.isScalar() && "invalid operand type");
+ assert(ValTy.getElementType() == ResTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Idx);
+}
+
void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index cc026ef..677941d 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,17 +12,39 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
#define DEBUG_TYPE "regbankselect"
@@ -36,6 +58,7 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
"Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
+
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
@@ -47,8 +70,7 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
false)
RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
- MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ : MachineFunctionPass(ID), OptMode(RunningMode) {
initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
@@ -71,6 +93,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
+ MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -131,9 +154,11 @@ bool RegBankSelect::repairReg(
TargetRegisterInfo::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
- // Build the instruction used to repair, then clone it at the right places.
- MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
- MI->removeFromParent();
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MachineInstr *MI =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
<< '\n');
// TODO:
@@ -200,32 +225,30 @@ uint64_t RegBankSelect::getRepairCost(
RBI->copyCost(*DesiredRegBrank, *CurRegBank,
RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
- if (Cost != UINT_MAX)
+ if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
- assert(!TPC->isGlobalISelAbortEnabled() &&
- "Legalization not available yet");
// Return the legalization cost of that repairing.
}
- assert(!TPC->isGlobalISelAbortEnabled() &&
- "Complex repairing not implemented yet");
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
}
-RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
+const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
SmallVectorImpl<RepairingPlacement> &RepairPts) {
assert(!PossibleMappings.empty() &&
"Do not know how to map this instruction");
- RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
+ const RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
MappingCost Cost = MappingCost::ImpossibleCost();
SmallVector<RepairingPlacement, 4> LocalRepairPts;
- for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) {
- MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost);
+ for (const RegisterBankInfo::InstructionMapping *CurMapping :
+ PossibleMappings) {
+ MappingCost CurCost =
+ computeMapping(MI, *CurMapping, LocalRepairPts, &Cost);
if (CurCost < Cost) {
DEBUG(dbgs() << "New best: " << CurCost << '\n');
Cost = CurCost;
- BestMapping = &CurMapping;
+ BestMapping = CurMapping;
RepairPts.clear();
for (RepairingPlacement &RepairPt : LocalRepairPts)
RepairPts.emplace_back(std::move(RepairPt));
@@ -235,7 +258,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
// If none of the mapping worked that means they are all impossible.
// Thus, pick the first one and set an impossible repairing point.
// It will trigger the failed isel mode.
- BestMapping = &(*PossibleMappings.begin());
+ BestMapping = *PossibleMappings.begin();
RepairPts.emplace_back(
RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
} else
@@ -352,7 +375,7 @@ void RegBankSelect::tryAvoidingSplit(
// the repairing cost because of the PHIs already proceeded
// as already stated.
// Though the code will be correct.
- assert(0 && "Repairing cost may not be accurate");
+ assert(false && "Repairing cost may not be accurate");
} else {
// We need to do non-local repairing. Basically, patch all
// the uses (i.e., phis) that we already proceeded.
@@ -448,6 +471,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// Sums up the repairing cost of MO at each insertion point.
uint64_t RepairCost = getRepairCost(MO, ValMapping);
+
+ // This is an impossible to repair cost.
+ if (RepairCost == std::numeric_limits<unsigned>::max())
+ continue;
+
// Bias used for splitting: 5%.
const uint64_t PercentageForBias = 5;
uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
@@ -530,9 +558,11 @@ bool RegBankSelect::applyMapping(
llvm_unreachable("Other kind should not happen");
}
}
+
// Second, rewrite the instruction.
DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
+
return true;
}
@@ -541,10 +571,10 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
// Remember the repairing placement for all the operands.
SmallVector<RepairingPlacement, 4> RepairPts;
- RegisterBankInfo::InstructionMapping BestMapping;
+ const RegisterBankInfo::InstructionMapping *BestMapping;
if (OptMode == RegBankSelect::Mode::Fast) {
- BestMapping = RBI->getInstrMapping(MI);
- MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
+ BestMapping = &RBI->getInstrMapping(MI);
+ MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts);
(void)DefaultCost;
if (DefaultCost == MappingCost::ImpossibleCost())
return false;
@@ -553,16 +583,16 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
RBI->getInstrPossibleMappings(MI);
if (PossibleMappings.empty())
return false;
- BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
+ BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts);
}
// Make sure the mapping is valid for MI.
- assert(BestMapping.verify(MI) && "Invalid instruction mapping");
+ assert(BestMapping->verify(MI) && "Invalid instruction mapping");
- DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n');
+ DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');
// After this call, MI may not be valid anymore.
// Do not use it.
- return applyMapping(MI, BestMapping, RepairPts);
+ return applyMapping(MI, *BestMapping, RepairPts);
}
bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
@@ -585,18 +615,12 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// LegalizerInfo as it's currently in the separate GlobalISel library.
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
- }
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << "Instruction is not legal: " << MI << '\n';
- report_fatal_error(Err.str());
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", MI);
+ return false;
}
}
}
@@ -622,9 +646,8 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
if (!assignInstr(MI)) {
- if (TPC->isGlobalISelAbortEnabled())
- report_fatal_error("Unable to map instruction");
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "unable to map instruction", MI);
return false;
}
}
@@ -640,11 +663,8 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
RepairingPlacement::RepairingKind Kind)
// Default is, we are going to insert code to repair OpIdx.
- : Kind(Kind),
- OpIdx(OpIdx),
- CanMaterialize(Kind != RepairingKind::Impossible),
- HasSplit(false),
- P(P) {
+ : Kind(Kind), OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
const MachineOperand &MO = MI.getOperand(OpIdx);
assert(MO.isReg() && "Trying to repair a non-reg operand");
@@ -849,7 +869,7 @@ bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
}
RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
- : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+ : LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
// Check if this overflows.
@@ -922,7 +942,6 @@ bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
OtherLocalAdjust = Cost.LocalCost - LocalCost;
else
ThisLocalAdjust = LocalCost - Cost.LocalCost;
-
} else {
ThisLocalAdjust = LocalCost;
OtherLocalAdjust = Cost.LocalCost;
@@ -968,10 +987,12 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
LocalFreq == Cost.LocalFreq;
}
-void RegBankSelect::MappingCost::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
if (*this == ImpossibleCost()) {
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 49d676f..83b21e6 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -19,10 +19,11 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses)
+RegisterBank::RegisterBank(
+ unsigned ID, const char *Name, unsigned Size,
+ const uint32_t *CoveredClasses, unsigned NumRegClasses)
: ID(ID), Name(Name), Size(Size) {
- ContainedRegClasses.resize(200);
+ ContainedRegClasses.resize(NumRegClasses);
ContainedRegClasses.setBitsInMask(CoveredClasses);
}
@@ -47,7 +48,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
// Verify that the Size of the register bank is big enough to cover
// all the register classes it covers.
- assert((getSize() >= SubRC.getSize() * 8) &&
+ assert(getSize() >= TRI.getRegSizeInBits(SubRC) &&
"Size is not big enough for all the subclasses!");
assert(covers(SubRC) && "Not all subclasses are covered");
}
@@ -75,9 +76,11 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
return &OtherRB == this;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
print(dbgs(), /* IsForDebug */ true, TRI);
}
+#endif
void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
const TargetRegisterInfo *TRI) const {
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index da5ab0b..a841902 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -45,6 +45,10 @@ STATISTIC(NumOperandsMappingsCreated,
"Number of operands mappings dynamically created");
STATISTIC(NumOperandsMappingsAccessed,
"Number of operands mappings dynamically accessed");
+STATISTIC(NumInstructionMappingsCreated,
+ "Number of instruction mappings dynamically created");
+STATISTIC(NumInstructionMappingsAccessed,
+ "Number of instruction mappings dynamically accessed");
const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
@@ -63,13 +67,6 @@ RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
#endif // NDEBUG
}
-RegisterBankInfo::~RegisterBankInfo() {
- for (auto It : MapOfPartialMappings)
- delete It.second;
- for (auto It : MapOfValueMappings)
- delete It.second;
-}
-
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
@@ -133,19 +130,27 @@ const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
return &RC;
}
-RegisterBankInfo::InstructionMapping
+/// Check whether or not \p MI should be treated like a copy
+/// for the mappings.
+/// Copy like instruction are special for mapping because
+/// they don't have actual register constraints. Moreover,
+/// they sometimes have register classes assigned and we can
+/// just use that instead of failing to provide a generic mapping.
+static bool isCopyLike(const MachineInstr &MI) {
+ return MI.isCopy() || MI.isPHI() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE;
+}
+
+const RegisterBankInfo::InstructionMapping &
RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copies we want to walk over the operands and try to find one
// that has a register bank since the instruction itself will not get
// us any constraint.
- bool isCopyLike = MI.isCopy() || MI.isPHI();
+ bool IsCopyLike = isCopyLike(MI);
// For copy like instruction, only the mapping of the definition
// is important. The rest is not constrained.
- unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+ unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
- RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
- /*OperandsMapping*/ nullptr,
- NumOperandsForMapping);
const MachineFunction &MF = *MI.getParent()->getParent();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -175,7 +180,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copy-like instruction, we want to reuse the register bank
// that is already set on Reg, if any, since those instructions do
// not have any constraints.
- const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr;
+ const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr;
if (!CurRegBank) {
// If this is a target specific instruction, we can deduce
// the register bank from the encoding constraints.
@@ -184,15 +189,15 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// All our attempts failed, give up.
CompleteMapping = false;
- if (!isCopyLike)
+ if (!IsCopyLike)
// MI does not carry enough information to guess the mapping.
- return InstructionMapping();
+ return getInvalidInstructionMapping();
continue;
}
}
const ValueMapping *ValMapping =
&getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
- if (isCopyLike) {
+ if (IsCopyLike) {
OperandsMapping[0] = ValMapping;
CompleteMapping = true;
break;
@@ -200,13 +205,15 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
OperandsMapping[OpIdx] = ValMapping;
}
- if (isCopyLike && !CompleteMapping)
+ if (IsCopyLike && !CompleteMapping)
// No way to deduce the type from what we have.
- return InstructionMapping();
+ return getInvalidInstructionMapping();
assert(CompleteMapping && "Setting an uncomplete mapping");
- Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping));
- return Mapping;
+ return getInstructionMapping(
+ DefaultMappingID, /*Cost*/ 1,
+ /*OperandsMapping*/ getOperandsMapping(OperandsMapping),
+ NumOperandsForMapping);
}
/// Hashing function for PartialMapping.
@@ -234,8 +241,8 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
++NumPartialMappingsCreated;
- const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
- PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+ auto &PartMapping = MapOfPartialMappings[Hash];
+ PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank);
return *PartMapping;
}
@@ -268,8 +275,8 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
++NumValueMappingsCreated;
- const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
- ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+ auto &ValMapping = MapOfValueMappings[Hash];
+ ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
return *ValMapping;
}
@@ -282,9 +289,9 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// The addresses of the value mapping are unique.
// Therefore, we can use them directly to hash the operand mapping.
hash_code Hash = hash_combine_range(Begin, End);
- const auto &It = MapOfOperandsMappings.find(Hash);
- if (It != MapOfOperandsMappings.end())
- return It->second;
+ auto &Res = MapOfOperandsMappings[Hash];
+ if (Res)
+ return Res.get();
++NumOperandsMappingsCreated;
@@ -293,8 +300,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// mapping, because we use the pointer of the ValueMapping
// to hash and we expect them to uniquely identify an instance
// of value mapping.
- ValueMapping *&Res = MapOfOperandsMappings[Hash];
- Res = new ValueMapping[std::distance(Begin, End)];
+ Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End));
unsigned Idx = 0;
for (Iterator It = Begin; It != End; ++It, ++Idx) {
const ValueMapping *ValMap = *It;
@@ -302,7 +308,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
continue;
Res[Idx] = *ValMap;
}
- return Res;
+ return Res.get();
}
const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
@@ -317,9 +323,44 @@ const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
}
-RegisterBankInfo::InstructionMapping
+static hash_code
+hashInstructionMapping(unsigned ID, unsigned Cost,
+ const RegisterBankInfo::ValueMapping *OperandsMapping,
+ unsigned NumOperands) {
+ return hash_combine(ID, Cost, OperandsMapping, NumOperands);
+}
+
+const RegisterBankInfo::InstructionMapping &
+RegisterBankInfo::getInstructionMappingImpl(
+ bool IsInvalid, unsigned ID, unsigned Cost,
+ const RegisterBankInfo::ValueMapping *OperandsMapping,
+ unsigned NumOperands) const {
+ assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 &&
+ OperandsMapping == nullptr && NumOperands == 0) ||
+ !IsInvalid) &&
+ "Mismatch argument for invalid input");
+ ++NumInstructionMappingsAccessed;
+
+ hash_code Hash =
+ hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands);
+ const auto &It = MapOfInstructionMappings.find(Hash);
+ if (It != MapOfInstructionMappings.end())
+ return *It->second;
+
+ ++NumInstructionMappingsCreated;
+
+ auto &InstrMapping = MapOfInstructionMappings[Hash];
+ if (IsInvalid)
+ InstrMapping = llvm::make_unique<InstructionMapping>();
+ else
+ InstrMapping = llvm::make_unique<InstructionMapping>(
+ ID, Cost, OperandsMapping, NumOperands);
+ return *InstrMapping;
+}
+
+const RegisterBankInfo::InstructionMapping &
RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
- RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
llvm_unreachable("The target must implement this");
@@ -329,14 +370,14 @@ RegisterBankInfo::InstructionMappings
RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
InstructionMappings PossibleMappings;
// Put the default mapping first.
- PossibleMappings.push_back(getInstrMapping(MI));
+ PossibleMappings.push_back(&getInstrMapping(MI));
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
- for (InstructionMapping &AltMapping : AltMappings)
- PossibleMappings.emplace_back(std::move(AltMapping));
+ for (const InstructionMapping *AltMapping : AltMappings)
+ PossibleMappings.push_back(AltMapping);
#ifndef NDEBUG
- for (const InstructionMapping &Mapping : PossibleMappings)
- assert(Mapping.verify(MI) && "Mapping is invalid");
+ for (const InstructionMapping *Mapping : PossibleMappings)
+ assert(Mapping->verify(MI) && "Mapping is invalid");
#endif
return PossibleMappings;
}
@@ -349,6 +390,7 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
DEBUG(dbgs() << "Applying default-like mapping\n");
for (unsigned OpIdx = 0,
EndIdx = OpdMapper.getInstrMapping().getNumOperands();
@@ -359,6 +401,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " is not a register, nothing to be done\n");
continue;
}
+ if (!MO.getReg()) {
+ DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ continue;
+ }
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
+ 0 &&
+ "Invalid mapping");
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
1 &&
"This mapping is too complex for this function");
@@ -368,9 +417,25 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
- DEBUG(dbgs() << " changed, replace " << MO.getReg());
- MO.setReg(*NewRegs.begin());
- DEBUG(dbgs() << " with " << MO.getReg());
+ unsigned OrigReg = MO.getReg();
+ unsigned NewReg = *NewRegs.begin();
+ DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr));
+ MO.setReg(NewReg);
+ DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr));
+
+ // The OperandsMapper creates plain scalar, we may have to fix that.
+ // Check if the types match and if not, fix that.
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT NewTy = MRI.getType(NewReg);
+ if (OrigTy != NewTy) {
+ assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+ "Types with difference size cannot be handled by the default "
+ "mapping");
+ DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
+ << OrigTy);
+ MRI.setType(NewReg, OrigTy);
+ }
+ DEBUG(dbgs() << '\n');
}
}
@@ -394,16 +459,18 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
RC = MRI.getRegClass(Reg);
}
assert(RC && "Unable to deduce the register class");
- return RC->getSize() * 8;
+ return TRI.getRegSizeInBits(*RC);
}
//------------------------------------------------------------------------------
// Helper classes implementation.
//------------------------------------------------------------------------------
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
bool RegisterBankInfo::PartialMapping::verify() const {
assert(RegBank && "Register bank not set");
@@ -451,10 +518,12 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
OS << "#BreakDown: " << NumBreakDowns << " ";
@@ -472,8 +541,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
// Check that all the register operands are properly mapped.
// Check the constructor invariant.
// For PHI, we only care about mapping the definition.
- assert(NumOperands ==
- ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
+ assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
"NumOperands must match, see constructor");
assert(MI.getParent() && MI.getParent()->getParent() &&
"MI must be connected to a MachineFunction");
@@ -503,10 +571,12 @@ bool RegisterBankInfo::InstructionMapping::verify(
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
@@ -576,6 +646,11 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
for (unsigned &NewVReg : NewVRegsForOpIdx) {
assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
+ // The new registers are always bound to scalar with the right size.
+ // The actual type has to be set when the target does the mapping
+ // of the instruction.
+ // The rationale is that this generic code cannot guess how the
+ // target plans to split the input type.
NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
MRI.setRegBank(NewVReg, *PartMap->RegBank);
++PartMap;
@@ -619,10 +694,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
return Res;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
print(dbgs(), true);
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
bool ForDebug) const {
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index e500918..5ecaf5c 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,10 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -22,6 +26,23 @@
using namespace llvm;
+unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI,
+ MachineInstr &InsertPt, unsigned Reg,
+ const TargetRegisterClass &RegClass) {
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
+ unsigned NewReg = MRI.createVirtualRegister(&RegClass);
+ BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ return NewReg;
+ }
+
+ return Reg;
+}
+
+
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
@@ -32,14 +53,76 @@ unsigned llvm::constrainOperandRegClass(
"PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+ return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
+}
- if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
- unsigned NewReg = MRI.createVirtualRegister(RegClass);
- BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
- TII.get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- return NewReg;
+bool llvm::isTriviallyDead(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ // If we can move an instruction, we can remove it. Otherwise, it has
+ // a side-effect of some sort.
+ bool SawStore = false;
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+ return false;
+
+ // Instructions without side-effects are dead iff they only define dead vregs.
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI.use_nodbg_empty(Reg))
+ return false;
}
+ return true;
+}
- return Reg;
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ MORE.emit(R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ const char *PassName, StringRef Msg,
+ const MachineInstr &MI) {
+ MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
+ MI.getDebugLoc(), MI.getParent());
+ R << Msg << ": " << ore::MNV("Inst", MI);
+ reportGISelFailure(MF, TPC, MORE, R);
+}
+
+Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return None;
+
+ if (MI->getOperand(1).isImm())
+ return MI->getOperand(1).getImm();
+
+ if (MI->getOperand(1).isCImm() &&
+ MI->getOperand(1).getCImm()->getBitWidth() <= 64)
+ return MI->getOperand(1).getCImm()->getSExtValue();
+
+ return None;
+}
+
+const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
+ return nullptr;
+ return MI->getOperand(1).getFPImm();
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 1ea5349..c6ca49c 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -192,10 +192,7 @@ namespace {
} // end anonymous namespace
char GlobalMerge::ID = 0;
-INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
- false, false)
-INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
- false, false)
+INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false)
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
@@ -556,7 +553,12 @@ bool GlobalMerge::doInitialization(Module &M) {
// Grab all non-const globals.
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
- if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
+ if (GV.isDeclaration() || GV.isThreadLocal() ||
+ GV.hasSection() || GV.hasImplicitSection())
+ continue;
+
+ // It's not safe to merge globals that may be preempted
+ if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
continue;
if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index b9f3d86..ff84053 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "BranchFolding.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
@@ -25,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -39,7 +39,7 @@
using namespace llvm;
-#define DEBUG_TYPE "ifcvt"
+#define DEBUG_TYPE "if-converter"
// Hidden options for help debugging.
static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
@@ -316,9 +316,9 @@ namespace {
char &llvm::IfConverterID = IfConverter::ID;
-INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
@@ -588,19 +588,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-/// Shrink the provided inclusive range by one instruction.
-/// If the range was one instruction (\p It == \p Begin), It is not modified,
-/// but \p Empty is set to true.
-static inline void shrinkInclusiveRange(
- MachineBasicBlock::iterator &Begin,
- MachineBasicBlock::iterator &It,
- bool &Empty) {
- if (It == Begin)
- Empty = true;
- else
- It--;
-}
-
/// Count duplicated instructions and move the iterators to show where they
/// are.
/// @param TIB True Iterator Begin
@@ -633,10 +620,8 @@ bool IfConverter::CountDuplicatedInstructions(
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
TIB = skipDebugInstructionsForward(TIB, TIE);
- if(TIB == TIE)
- break;
FIB = skipDebugInstructionsForward(FIB, FIE);
- if(FIB == FIE)
+ if (TIB == TIE || FIB == FIE)
break;
if (!TIB->isIdenticalTo(*FIB))
break;
@@ -656,58 +641,42 @@ bool IfConverter::CountDuplicatedInstructions(
if (TIB == TIE || FIB == FIE)
return true;
// Now, in preparation for counting duplicate instructions at the ends of the
- // blocks, move the end iterators up past any branch instructions.
- --TIE;
- --FIE;
-
- // After this point TIB and TIE define an inclusive range, which means that
- // TIB == TIE is true when there is one more instruction to consider, not at
- // the end. Because we may not be able to go before TIB, we need a flag to
- // indicate a completely empty range.
- bool TEmpty = false, FEmpty = false;
-
- // Upon exit TIE and FIE will both point at the last non-shared instruction.
- // They need to be moved forward to point past the last non-shared
- // instruction if the range they delimit is non-empty.
- auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
- if (!TEmpty)
- ++TIE;
- if (!FEmpty)
- ++FIE;
- });
+ // blocks, switch to reverse_iterators. Note that getReverse() returns an
+ // iterator that points to the same instruction, unlike std::reverse_iterator.
+ // We have to do our own shifting so that we get the same range.
+ MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse());
+ MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse());
+ const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse());
+ const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse());
if (!TBB.succ_empty() || !FBB.succ_empty()) {
if (SkipUnconditionalBranches) {
- while (!TEmpty && TIE->isUnconditionalBranch())
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- while (!FEmpty && FIE->isUnconditionalBranch())
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ while (RTIE != RTIB && RTIE->isUnconditionalBranch())
+ ++RTIE;
+ while (RFIE != RFIB && RFIE->isUnconditionalBranch())
+ ++RFIE;
}
}
- // If Dups1 includes all of a block, then don't count duplicate
- // instructions at the end of the blocks.
- if (TEmpty || FEmpty)
- return true;
-
// Count duplicate instructions at the ends of the blocks.
- while (!TEmpty && !FEmpty) {
+ while (RTIE != RTIB && RFIE != RFIB) {
// Skip dbg_value instructions. These do not count.
- TIE = skipDebugInstructionsBackward(TIE, TIB);
- FIE = skipDebugInstructionsBackward(FIE, FIB);
- TEmpty = TIE == TIB && TIE->isDebugValue();
- FEmpty = FIE == FIB && FIE->isDebugValue();
- if (TEmpty || FEmpty)
+ // Note that these are reverse iterators going forward.
+ RTIE = skipDebugInstructionsForward(RTIE, RTIB);
+ RFIE = skipDebugInstructionsForward(RFIE, RFIB);
+ if (RTIE == RTIB || RFIE == RFIB)
break;
- if (!TIE->isIdenticalTo(*FIE))
+ if (!RTIE->isIdenticalTo(*RFIE))
break;
// We have to verify that any branch instructions are the same, and then we
// don't count them toward the # of duplicate instructions.
- if (!TIE->isBranch())
+ if (!RTIE->isBranch())
++Dups2;
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ ++RTIE;
+ ++RFIE;
}
+ TIE = std::next(RTIE.getReverse());
+ FIE = std::next(RFIE.getReverse());
return true;
}
@@ -741,25 +710,21 @@ bool IfConverter::RescanInstructions(
static void verifySameBranchInstructions(
MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
- MachineBasicBlock::iterator B1 = MBB1->begin();
- MachineBasicBlock::iterator B2 = MBB2->begin();
- MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
- MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
- bool Empty1 = false, Empty2 = false;
- while (!Empty1 && !Empty2) {
- E1 = skipDebugInstructionsBackward(E1, B1);
- E2 = skipDebugInstructionsBackward(E2, B2);
- Empty1 = E1 == B1 && E1->isDebugValue();
- Empty2 = E2 == B2 && E2->isDebugValue();
-
- if (Empty1 && Empty2)
+ const MachineBasicBlock::reverse_iterator B1 = MBB1->rend();
+ const MachineBasicBlock::reverse_iterator B2 = MBB2->rend();
+ MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin();
+ MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin();
+ while (E1 != B1 && E2 != B2) {
+ skipDebugInstructionsForward(E1, B1);
+ skipDebugInstructionsForward(E2, B2);
+ if (E1 == B1 && E2 == B2)
break;
- if (Empty1) {
+ if (E1 == B1) {
assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
break;
}
- if (Empty2) {
+ if (E2 == B2) {
assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
break;
}
@@ -769,8 +734,8 @@ static void verifySameBranchInstructions(
"Branch mis-match, branch instructions don't match.");
else
break;
- shrinkInclusiveRange(B1, E1, Empty1);
- shrinkInclusiveRange(B2, E2, Empty2);
+ ++E1;
+ ++E2;
}
}
#endif
@@ -1353,7 +1318,8 @@ static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
return false;
PI = I++;
}
- return true;
+ // Finally see if the last I is indeed a successor to PI.
+ return PI->isSuccessor(&*I);
}
/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
@@ -1508,8 +1474,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
DontKill.addLiveIns(NextMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
@@ -1518,11 +1487,11 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// explicitly remove CvtBBI as a successor.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
+ // Predicate the instructions in the true block.
RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Merge converted block into entry block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI);
}
@@ -1622,8 +1591,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
+ // Remove the branches from the entry so we can add the contents of the true
+ // block to it.
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
if (CvtMBB.pred_size() > 1) {
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
@@ -1637,7 +1609,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Now merge the entry of the triangle with the true block.
- BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI, false);
}
@@ -2183,7 +2154,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// unknown probabilities into known ones.
// FIXME: This usage is too tricky and in the future we would like to
// eliminate all unknown probabilities in MBB.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
FromMBB.succ_end());
@@ -2263,7 +2235,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// Normalize the probabilities of ToBBI.BB's successors with all adjustment
// we've done above.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable && FromBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 9588dfb..e308f49 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -22,6 +22,7 @@
// With the help of a runtime that understands the .fault_maps section,
// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
// a page fault.
+// Store and LoadStore are also supported.
//
//===----------------------------------------------------------------------===//
@@ -29,21 +30,22 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -151,25 +153,44 @@ class ImplicitNullChecks : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
AliasAnalysis *AA = nullptr;
MachineModuleInfo *MMI = nullptr;
+ MachineFrameInfo *MFI = nullptr;
bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
SmallVectorImpl<NullCheck> &NullCheckList);
- MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB);
+ MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB);
void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
- /// Is \p MI a memory operation that can be used to implicitly null check the
- /// value in \p PointerReg? \p PrevInsts is the set of instruction seen since
+ enum AliasResult {
+ AR_NoAlias,
+ AR_MayAlias,
+ AR_WillAliasEverything
+ };
+ /// Returns AR_NoAlias if \p MI memory operation does not alias with
+ /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
+ /// they may alias and any further memory operation may alias with \p PrevMI.
+ AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+
+ enum SuitabilityResult {
+ SR_Suitable,
+ SR_Unsuitable,
+ SR_Impossible
+ };
+ /// Return SR_Suitable if \p MI a memory operation that can be used to
+ /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
+ /// \p MI cannot be used to null check and SR_Impossible if there is
+ /// no sense to continue lookup due to any other instruction will not be able
+ /// to be used. \p PrevInsts is the set of instruction seen since
/// the explicit null check on \p PointerReg.
- bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
- ArrayRef<MachineInstr *> PrevInsts);
+ SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts);
/// Return true if \p FaultingMI can be hoisted from after the the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
/// non-null value if we also need to (and legally can) hoist a depedency.
- bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar,
- MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+ bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
public:
static char ID;
@@ -193,7 +214,7 @@ public:
}
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
@@ -248,7 +269,7 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
unsigned RegB = MOB.getReg();
- if (TRI->regsOverlap(RegA, RegB))
+ if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
return false;
}
}
@@ -260,6 +281,7 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getRegInfo().getTargetRegisterInfo();
MMI = &MF.getMMI();
+ MFI = &MF.getFrameInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<NullCheck, 16> NullCheckList;
@@ -283,36 +305,76 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
return false;
}
-bool ImplicitNullChecks::isSuitableMemoryOp(
- MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+ImplicitNullChecks::AliasResult
+ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
+ MachineInstr *PrevMI) {
+ // If it is not memory access, skip the check.
+ if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
+ return AR_NoAlias;
+ // Load-Load may alias
+ if (!(MI.mayStore() || PrevMI->mayStore()))
+ return AR_NoAlias;
+ // We lost info, conservatively alias. If it was store then no sense to
+ // continue because we won't be able to check against it further.
+ if (MI.memoperands_empty())
+ return MI.mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+ if (PrevMI->memoperands_empty())
+ return PrevMI->mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+
+ for (MachineMemOperand *MMO1 : MI.memoperands()) {
+ // MMO1 should have a value due it comes from operation we'd like to use
+ // as implicit null check.
+ assert(MMO1->getValue() && "MMO1 should have a Value!");
+ for (MachineMemOperand *MMO2 : PrevMI->memoperands()) {
+ if (const PseudoSourceValue *PSV = MMO2->getPseudoValue()) {
+ if (PSV->mayAlias(MFI))
+ return AR_MayAlias;
+ continue;
+ }
+ llvm::AliasResult AAResult = AA->alias(
+ MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MMO2->getAAInfo()));
+ if (AAResult != NoAlias)
+ return AR_MayAlias;
+ }
+ }
+ return AR_NoAlias;
+}
+
+ImplicitNullChecks::SuitabilityResult
+ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
unsigned BaseReg;
if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
BaseReg != PointerReg)
- return false;
-
- // We want the load to be issued at a sane offset from PointerReg, so that
- // if PointerReg is null then the load reliably page faults.
- if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
- return false;
-
- // Finally, we need to make sure that the load instruction actually is
- // loading from PointerReg, and there isn't some re-definition of PointerReg
- // between the compare and the load.
- for (auto *PrevMI : PrevInsts)
- for (auto &PrevMO : PrevMI->operands())
- if (PrevMO.isReg() && PrevMO.getReg() &&
- TRI->regsOverlap(PrevMO.getReg(), PointerReg))
- return false;
-
- return true;
+ return SR_Unsuitable;
+
+ // We want the mem access to be issued at a sane offset from PointerReg,
+ // so that if PointerReg is null then the access reliably page faults.
+ if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+ Offset < PageSize))
+ return SR_Unsuitable;
+
+ // Finally, check whether the current memory access aliases with previous one.
+ for (auto *PrevMI : PrevInsts) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ return SR_Unsuitable;
+ }
+ return SR_Suitable;
}
-bool ImplicitNullChecks::canHoistLoadInst(
- MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
- MachineInstr *&Dependence) {
+bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
+ unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc,
+ MachineInstr *&Dependence) {
auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
if (!DepResult.CanReorder)
return false;
@@ -359,7 +421,8 @@ bool ImplicitNullChecks::canHoistLoadInst(
// The Dependency can't be re-defining the base register -- then we won't
// get the memory operation on the address we want. This is already
// checked in \c IsSuitableMemoryOp.
- assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+ assert(!(DependenceMO.isDef() &&
+ TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) &&
"Should have been checked before!");
}
@@ -481,50 +544,76 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return false;
MachineInstr *Dependence;
- if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
- canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
- Dependence)) {
+ SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar);
+ if (SR == SR_Impossible)
+ return false;
+ if (SR == SR_Suitable &&
+ canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) {
NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc, Dependence);
return true;
}
+ // If MI re-defines the PointerReg then we cannot move further.
+ if (any_of(MI.operands(), [&](MachineOperand &MO) {
+ return MO.isReg() && MO.getReg() && MO.isDef() &&
+ TRI->regsOverlap(MO.getReg(), PointerReg);
+ }))
+ return false;
InstsSeenSoFar.push_back(&MI);
}
return false;
}
-/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
-/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI
-/// (defining the same register), and branches to HandlerMBB if the load
-/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
-MachineInstr *
-ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
- MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB) {
+/// Wrap a machine instruction, MI, into a FAULTING machine instruction.
+/// The FAULTING instruction does the same load/store as MI
+/// (defining the same register), and branches to HandlerMBB if the mem access
+/// faults. The FAULTING instruction is inserted at the end of MBB.
+MachineInstr *ImplicitNullChecks::insertFaultingInstr(
+ MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *HandlerMBB) {
const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
// all targets.
DebugLoc DL;
- unsigned NumDefs = LoadMI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs();
assert(NumDefs <= 1 && "other cases unhandled!");
unsigned DefReg = NoRegister;
if (NumDefs != 0) {
- DefReg = LoadMI->defs().begin()->getReg();
- assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ DefReg = MI->defs().begin()->getReg();
+ assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 &&
"expected exactly one def!");
}
- auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
- .addMBB(HandlerMBB)
- .addImm(LoadMI->getOpcode());
+ FaultMaps::FaultKind FK;
+ if (MI->mayLoad())
+ FK =
+ MI->mayStore() ? FaultMaps::FaultingLoadStore : FaultMaps::FaultingLoad;
+ else
+ FK = FaultMaps::FaultingStore;
- for (auto &MO : LoadMI->uses())
- MIB.addOperand(MO);
+ auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_OP), DefReg)
+ .addImm(FK)
+ .addMBB(HandlerMBB)
+ .addImm(MI->getOpcode());
+
+ for (auto &MO : MI->uses()) {
+ if (MO.isReg()) {
+ MachineOperand NewMO = MO;
+ if (MO.isUse()) {
+ NewMO.setIsKill(false);
+ } else {
+ assert(MO.isDef() && "Expected def or use");
+ NewMO.setIsDead(false);
+ }
+ MIB.add(NewMO);
+ } else {
+ MIB.add(MO);
+ }
+ }
- MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return MIB;
}
@@ -545,18 +634,18 @@ void ImplicitNullChecks::rewriteNullChecks(
NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
}
- // Insert a faulting load where the conditional branch was originally. We
- // check earlier ensures that this bit of code motion is legal. We do not
- // touch the successors list for any basic block since we haven't changed
- // control flow, we've just made it implicit.
- MachineInstr *FaultingLoad = insertFaultingLoad(
+ // Insert a faulting instruction where the conditional branch was
+ // originally. We check earlier ensures that this bit of code motion
+ // is legal. We do not touch the successors list for any basic block
+ // since we haven't changed control flow, we've just made it implicit.
+ MachineInstr *FaultingInstr = insertFaultingInstr(
NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
// Now the values defined by MemOperation, if any, are live-in of
// the block of MemOperation.
- // The original load operation may define implicit-defs alongside
- // the loaded value.
+ // The original operation may define implicit-defs alongside
+ // the value.
MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
- for (const MachineOperand &MO : FaultingLoad->operands()) {
+ for (const MachineOperand &MO : FaultingInstr->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -588,8 +677,8 @@ void ImplicitNullChecks::rewriteNullChecks(
char ImplicitNullChecks::ID = 0;
char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
-INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
+INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE,
"Implicit null checks", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
+INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE,
"Implicit null checks", false, false)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 3d81184..eda4f74 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -558,7 +558,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
// We take the DebugLoc from MI, since OrigMI may be attributed to a
- // different source location.
+ // different source location.
auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
NewMI->setDebugLoc(MI.getDebugLoc());
@@ -643,8 +643,11 @@ void InlineSpiller::reMaterializeAll() {
Edit->eraseVirtReg(Reg);
continue;
}
- assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) &&
- "Reg with empty interval has reference");
+
+ assert(LIS.hasInterval(Reg) &&
+ (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) &&
+ "Empty and not used live-range?!");
+
RegsToSpill[ResultPos++] = Reg;
}
RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
@@ -686,7 +689,8 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
-#if !defined(NDEBUG)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
// Dump the range of instructions from B to E with their slot indexes.
static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
MachineBasicBlock::iterator E,
@@ -856,21 +860,46 @@ void InlineSpiller::insertReload(unsigned NewVReg,
++NumReloads;
}
+/// Check if \p Def fully defines a VReg with an undefined value.
+/// If that's the case, that means the value of VReg is actually
+/// not relevant.
+static bool isFullUndefDef(const MachineInstr &Def) {
+ if (!Def.isImplicitDef())
+ return false;
+ assert(Def.getNumOperands() == 1 &&
+ "Implicit def with more than one definition");
+ // We can say that the VReg defined by Def is undef, only if it is
+ // fully defined by Def. Otherwise, some of the lanes may not be
+ // undef and the value of the VReg matters.
+ return !Def.getOperand(0).getSubReg();
+}
+
/// insertSpill - Insert a spill of NewVReg after MI.
void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
MachineInstrSpan MIS(MI);
- TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
- MRI.getRegClass(NewVReg), &TRI);
+ bool IsRealSpill = true;
+ if (isFullUndefDef(*MI)) {
+ // Don't spill undef value.
+ // Anything works for undef, in particular keeping the memory
+ // uninitialized is a viable option and it saves code size and
+ // run time.
+ BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL))
+ .addReg(NewVReg, getKillRegState(isKill));
+ IsRealSpill = false;
+ } else
+ TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
"spill"));
++NumSpills;
- HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
+ if (IsRealSpill)
+ HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
}
/// spillAroundUses - insert spill code around each use of Reg.
@@ -887,20 +916,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Debug values are not allowed to affect codegen.
if (MI->isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- bool IsIndirect = MI->isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
- const MDNode *Var = MI->getDebugVariable();
- const MDNode *Expr = MI->getDebugExpression();
- DebugLoc DL = MI->getDebugLoc();
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
MachineBasicBlock *MBB = MI->getParent();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(StackSlot)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
+ DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
+ buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);
+ MBB->erase(MI);
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index ec35b3f..ee4929c 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -45,6 +45,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"
@@ -68,8 +69,7 @@ class InterleavedAccess : public FunctionPass {
public:
static char ID;
- InterleavedAccess(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
+ InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) {
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}
@@ -84,7 +84,6 @@ public:
private:
DominatorTree *DT;
- const TargetMachine *TM;
const TargetLowering *TLI;
/// The maximum supported interleave factor.
@@ -108,18 +107,16 @@ private:
} // end anonymous namespace.
char InterleavedAccess::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(
- InterleavedAccess, "interleaved-access",
+INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_TM_PASS_END(
- InterleavedAccess, "interleaved-access",
+INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
"Lower interleaved memory accesses to target specific intrinsics", false,
false)
-FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
- return new InterleavedAccess(TM);
+FunctionPass *llvm::createInterleavedAccessPass() {
+ return new InterleavedAccess();
}
/// \brief Check if the mask is a DE-interleave mask of the given factor
@@ -426,13 +423,15 @@ bool InterleavedAccess::lowerInterleavedStore(
}
bool InterleavedAccess::runOnFunction(Function &F) {
- if (!TM || !LowerInterleavedAccesses)
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC || !LowerInterleavedAccesses)
return false;
DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ auto &TM = TPC->getTM<TargetMachine>();
+ TLI = TM.getSubtargetImpl(F)->getTargetLowering();
MaxFactor = TLI->getMaxSupportedInterleaveFactor();
// Holds dead instructions that will be erased later.
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index afd2406..c6cc909 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::sqrt:
EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 26794e2..f2defb4 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
@@ -31,21 +30,11 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
- cl::desc("Enable the \"fast\" instruction selector"));
-
-static cl::opt<bool>
- EnableGlobalISel("global-isel", cl::Hidden, cl::init(false),
- cl::desc("Enable the \"global\" instruction selector"));
-
void LLVMTargetMachine::initAsmInfo() {
MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
MII = TheTarget.createMCInstrInfo();
@@ -71,8 +60,7 @@ void LLVMTargetMachine::initAsmInfo() {
TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
- if (Options.CompressDebugSections)
- TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu);
+ TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections);
TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
@@ -85,7 +73,7 @@ void LLVMTargetMachine::initAsmInfo() {
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
StringRef DataLayoutString,
const Triple &TT, StringRef CPU,
- StringRef FS, TargetOptions Options,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
@@ -106,109 +94,31 @@ static MCContext *
addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
bool DisableVerify, AnalysisID StartBefore,
AnalysisID StartAfter, AnalysisID StopBefore,
- AnalysisID StopAfter,
- MachineFunctionInitializer *MFInitializer = nullptr) {
-
- // When in emulated TLS mode, add the LowerEmuTLS pass.
- if (TM->Options.EmulatedTLS)
- PM.add(createLowerEmuTLSPass(TM));
-
- PM.add(createPreISelIntrinsicLoweringPass());
-
- // Add internal analysis passes from the target machine.
- PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
-
+ AnalysisID StopAfter) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
StopAfter);
-
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
-
PM.add(PassConfig);
-
- PassConfig->addIRPasses();
-
- PassConfig->addCodeGenPrepare();
-
- PassConfig->addPassesToHandleExceptions();
-
- PassConfig->addISelPrepare();
-
MachineModuleInfo *MMI = new MachineModuleInfo(TM);
- MMI->setMachineFunctionInitializer(MFInitializer);
PM.add(MMI);
- // Enable FastISel with -fast, but allow that to be overridden.
- TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
- if (EnableFastISelOption == cl::BOU_TRUE ||
- (TM->getOptLevel() == CodeGenOpt::None &&
- TM->getO0WantsFastISel()))
- TM->setFastISel(true);
-
- // Ask the target for an isel.
- if (LLVM_UNLIKELY(EnableGlobalISel)) {
- if (PassConfig->addIRTranslator())
- return nullptr;
-
- PassConfig->addPreLegalizeMachineIR();
-
- if (PassConfig->addLegalizeMachineIR())
- return nullptr;
-
- // Before running the register bank selector, ask the target if it
- // wants to run some passes.
- PassConfig->addPreRegBankSelect();
-
- if (PassConfig->addRegBankSelect())
- return nullptr;
-
- PassConfig->addPreGlobalInstructionSelect();
-
- if (PassConfig->addGlobalInstructionSelect())
- return nullptr;
-
- // Pass to reset the MachineFunction if the ISel failed.
- PM.add(createResetMachineFunctionPass(
- PassConfig->reportDiagnosticWhenGlobalISelFallback()));
-
- // Provide a fallback path when we do not want to abort on
- // not-yet-supported input.
- if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
- PassConfig->addInstSelector())
- return nullptr;
-
- } else if (PassConfig->addInstSelector())
+ if (PassConfig->addISelPasses())
return nullptr;
-
PassConfig->addMachinePasses();
-
PassConfig->setInitialized();
return &MMI->getContext();
}
-bool LLVMTargetMachine::addPassesToEmitFile(
- PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
- bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
- AnalysisID StopBefore, AnalysisID StopAfter,
- MachineFunctionInitializer *MFInitializer) {
- // Add common CodeGen passes.
- MCContext *Context =
- addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
- StopBefore, StopAfter, MFInitializer);
- if (!Context)
- return true;
-
- if (StopBefore || StopAfter) {
- PM.add(createPrintMIRPass(Out));
- return false;
- }
-
+bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
+ raw_pwrite_stream &Out, CodeGenFileType FileType,
+ MCContext &Context) {
if (Options.MCOptions.MCSaveTempLabels)
- Context->setAllowTemporaryLabels(false);
+ Context.setAllowTemporaryLabels(false);
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCAsmInfo &MAI = *getMCAsmInfo();
@@ -225,14 +135,14 @@ bool LLVMTargetMachine::addPassesToEmitFile(
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = nullptr;
if (Options.MCOptions.ShowMCEncoding)
- MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+ MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
Options.MCOptions);
auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
- *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
+ Context, std::move(FOut), Options.MCOptions.AsmVerbose,
Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
Options.MCOptions.ShowMCInst);
AsmStreamer.reset(S);
@@ -241,7 +151,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
Options.MCOptions);
@@ -249,11 +159,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
// Don't waste memory on names of temp labels.
- Context->setUseNamesOnTempLabels(false);
+ Context.setUseNamesOnTempLabels(false);
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
- T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
break;
@@ -261,7 +171,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
case CGFT_Null:
// The Null output is intended for use for performance analysis and testing,
// not real users.
- AsmStreamer.reset(getTarget().createNullStreamer(*Context));
+ AsmStreamer.reset(getTarget().createNullStreamer(Context));
break;
}
@@ -272,8 +182,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
PM.add(Printer);
- PM.add(createFreeMachineFunctionPass());
+ return false;
+}
+bool LLVMTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+ bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+ AnalysisID StopBefore, AnalysisID StopAfter) {
+ // Add common CodeGen passes.
+ MCContext *Context =
+ addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
+ StopBefore, StopAfter);
+ if (!Context)
+ return true;
+
+ if (StopBefore || StopAfter) {
+ PM.add(createPrintMIRPass(Out));
+ } else {
+ if (addAsmPrinter(PM, Out, FileType, *Context))
+ return true;
+ }
+
+ PM.add(createFreeMachineFunctionPass());
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
new file mode 100644
index 0000000..996d40c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -0,0 +1,97 @@
+///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// This is an alternative analysis pass to MachineBlockFrequencyInfo. The
+/// difference is that with this pass the block frequencies are not computed
+/// when the analysis pass is executed but rather when the BFI result is
+/// explicitly requested by the analysis client.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-machine-block-freq"
+
+INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+
+char LazyMachineBlockFrequencyInfoPass::ID = 0;
+
+LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass()
+ : MachineFunctionPass(ID) {
+ initializeLazyMachineBlockFrequencyInfoPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
+ const Module *M) const {
+ getBFI().print(OS, M);
+}
+
+void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LazyMachineBlockFrequencyInfoPass::releaseMemory() {
+ OwnedMBFI.reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
+}
+
+MachineBlockFrequencyInfo &
+LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
+ auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (MBFI) {
+ DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
+ return *MBFI;
+ }
+
+ auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
+ DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
+
+ if (!MLI) {
+ DEBUG(dbgs() << "Building LoopInfo on the fly\n");
+ // First create a dominator tree.
+ DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
+
+ if (!MDT) {
+ DEBUG(dbgs() << "Building DominatorTree on the fly\n");
+ OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Generate LoopInfo from it.
+ OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+
+ OwnedMBFI = make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI->calculate(*MF, MBPI, *MLI);
+ return *OwnedMBFI.get();
+}
+
+bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
+ MachineFunction &F) {
+ MF = &F;
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index 834ed5f..995c58a 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -15,13 +15,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "lexicalscopes"
@@ -38,6 +47,10 @@ void LexicalScopes::reset() {
/// initialize - Scan machine function and constuct lexical scope nest.
void LexicalScopes::initialize(const MachineFunction &Fn) {
+ // Don't attempt any lexical scope creation for a NoDebug compile unit.
+ if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return;
reset();
MF = &Fn;
SmallVector<InsnRange, 4> MIRanges;
@@ -54,7 +67,6 @@ void LexicalScopes::initialize(const MachineFunction &Fn) {
void LexicalScopes::extractLexicalScopes(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
// Scan each instruction and create scopes. First build working set of scopes.
for (const auto &MBB : *MF) {
const MachineInstr *RangeBeginMI = nullptr;
@@ -74,8 +86,9 @@ void LexicalScopes::extractLexicalScopes(
continue;
}
- // Ignore DBG_VALUE. It does not contribute to any instruction in output.
- if (MInsn.isDebugValue())
+ // Ignore DBG_VALUE and similar instruction that do not contribute to any
+ // instruction in the output.
+ if (MInsn.isMetaInstruction())
continue;
if (RangeBeginMI) {
@@ -127,6 +140,10 @@ LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
const DILocation *IA) {
if (IA) {
+ // Skip scopes inlined from a NoDebug compile unit.
+ if (Scope->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return getOrCreateLexicalScope(IA);
// Create an abstract scope for inlined function.
getOrCreateAbstractScope(Scope);
// Create an inlined scope for inlined function.
@@ -181,10 +198,9 @@ LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
else
Parent = getOrCreateLexicalScope(InlinedAt);
- I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
- std::forward_as_tuple(P),
- std::forward_as_tuple(Parent, Scope,
- InlinedAt, false))
+ I = InlinedLexicalScopeMap
+ .emplace(std::piecewise_construct, std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope, InlinedAt, false))
.first;
return &I->second;
}
@@ -241,7 +257,6 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
void LexicalScopes::assignInstructionRanges(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
LexicalScope *PrevLexicalScope = nullptr;
for (const auto &R : MIRanges) {
LexicalScope *S = MI2ScopeMap.lookup(R.first);
@@ -299,9 +314,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
return Result;
}
-/// dump - Print data structures.
-void LexicalScope::dump(unsigned Indent) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const {
raw_ostream &err = dbgs();
err.indent(Indent);
err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
@@ -316,5 +330,5 @@ void LexicalScope::dump(unsigned Indent) const {
for (unsigned i = 0, e = Children.size(); i != e; ++i)
if (Children[i] != this)
Children[i]->dump(Indent + 2);
-#endif
}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index c945376..b5e705f 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -24,13 +24,16 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -40,7 +43,7 @@
using namespace llvm;
-#define DEBUG_TYPE "live-debug-values"
+#define DEBUG_TYPE "livedebugvalues"
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
@@ -61,6 +64,7 @@ class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
LexicalScopes LS;
/// Keeps track of lexical scopes associated with a user value's source
@@ -127,11 +131,13 @@ private:
if (int RegNo = isDbgValueDescribedByReg(MI)) {
Kind = RegisterKind;
Loc.RegisterLoc.RegNo = RegNo;
- uint64_t Offset =
+ int64_t Offset =
MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
// We don't support offsets larger than 4GiB here. They are
// slated to be replaced with DIExpressions anyway.
- if (Offset >= (1ULL << 32))
+ // With indirect debug values used for spill locations, Offset
+ // can be negative.
+ if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32))
Kind = InvalidKind;
else
Loc.RegisterLoc.Offset = Offset;
@@ -150,7 +156,9 @@ private:
/// dominates MBB.
bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
- void dump() const { MI.dump(); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { MI.dump(); }
+#endif
bool operator==(const VarLoc &Other) const {
return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
@@ -167,6 +175,11 @@ private:
typedef UniqueVector<VarLoc> VarLocMap;
typedef SparseBitVector<> VarLocSet;
typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+ struct SpillDebugPair {
+ MachineInstr *SpillInst;
+ MachineInstr *DebugInst;
+ };
+ typedef SmallVector<SpillDebugPair, 4> SpillMap;
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
@@ -216,14 +229,21 @@ private:
}
};
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+ int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
+ void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, SpillMap &Spills);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
const VarLocMap &VarLocIDs);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills,
+ bool transferSpills);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -263,7 +283,7 @@ public:
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
-INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
false, false)
/// Default construct and initialize the pass.
@@ -282,6 +302,7 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
+#ifndef NDEBUG
void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLocInMBB &V,
const VarLocMap &VarLocIDs,
@@ -300,6 +321,22 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
Out << "\n";
}
+#endif
+
+/// Given a spill instruction, extract the register and offset used to
+/// address the spill location in a target independent way.
+int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
+ unsigned &Reg) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
@@ -336,8 +373,12 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
SparseBitVector<> KillSet;
for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def. Assume that call
+ // instructions never clobber SP, because some backends (e.g., AArch64)
+ // never list SP in the regmask.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg())) {
+ TRI->isPhysicalRegister(MO.getReg()) &&
+ !(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
for (unsigned ID : OpenRanges.getVarLocs())
@@ -358,6 +399,91 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
OpenRanges.erase(KillSet, VarLocIDs);
}
+/// Decide if @MI is a spill instruction and return true if it is. We use 2
+/// criteria to make this decision:
+/// - Is this instruction a store to a spill slot?
+/// - Is there a register operand that is both used and killed?
+/// TODO: Store optimization can fold spills into other stores (including
+/// other spills). We do not handle this yet (more than one memory operand).
+bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ int FI;
+ const MachineMemOperand *MMO;
+
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ // To identify a spill instruction, use the same criteria as in AsmPrinter.
+ if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
+ TII->hasStoreToStackSlot(MI, MMO, FI)) &&
+ FrameInfo.isSpillSlotObjectIndex(FI)))
+ return false;
+
+ // In a spill instruction generated by the InlineSpiller the spilled register
+ // has its kill flag set. Return false if we don't find such a register.
+ Reg = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isKill()) {
+ Reg = MO.getReg();
+ break;
+ }
+ }
+ return Reg != 0;
+}
+
+/// A spilled register may indicate that we have to end the current range of
+/// a variable and create a new one for the spill location.
+/// We don't want to insert any instructions in transfer(), so we just create
+/// the DBG_VALUE witout inserting it and keep track of it in @Spills.
+/// It will be inserted into the BB when we're done iterating over the
+/// instructions.
+void LiveDebugValues::transferSpillInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ SpillMap &Spills) {
+ unsigned Reg;
+ MachineFunction *MF = MI.getParent()->getParent();
+ if (!isSpillInstruction(MI, MF, Reg))
+ return;
+
+ // Check if the register is the location of a debug value.
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+ DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+
+ // Create a DBG_VALUE instruction to describe the Var in its spilled
+ // location, but don't insert it yet to avoid invalidating the
+ // iterator in our caller.
+ unsigned SpillBase;
+ int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
+ const MachineInstr *DMI = &VarLocIDs[ID].MI;
+ MachineInstr *SpDMI =
+ BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ SpDMI->getOperand(1).setImm(SpillOffset);
+ DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
+ SpDMI->print(dbgs(), false, TII));
+
+ // The newly created DBG_VALUE instruction SpDMI must be inserted after
+ // MI. Keep track of the pairing.
+ SpillDebugPair MIP = {&MI, SpDMI};
+ Spills.push_back(MIP);
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[ID].Var);
+
+ // Add the VarLoc to OpenRanges.
+ VarLoc VL(*SpDMI, LS);
+ unsigned SpillLocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(SpillLocID, VL.Var);
+ return;
+ }
+ }
+}
+
/// Terminate all open ranges at the end of the current basic block.
bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
@@ -383,10 +509,13 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
/// This routine creates OpenRanges and OutLocs.
bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) {
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
+ SpillMap &Spills, bool transferSpills) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ if (transferSpills)
+ transferSpillInst(MI, OpenRanges, VarLocIDs, Spills);
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
}
@@ -475,10 +604,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
- VarLocInMBB OutLocs; // Ranges that exist beyond bb.
- VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ SpillMap Spills; // DBG_VALUEs associated with spills.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
@@ -490,9 +620,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
Pending;
// Initialize every mbb with OutLocs.
+ // We are not looking at any spill instructions during the initial pass
+ // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
+ // instructions for spills of registers that are known to be user variables
+ // within the BB in which the spill occurs.
for (auto &MBB : MF)
for (auto &MI : MBB)
- transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/false);
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
dbgs()));
@@ -524,8 +659,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
if (MBBJoined) {
MBBJoined = false;
Changed = true;
+ // Now that we have started to extend ranges across BBs we need to
+ // examine spill instructions to see whether they spill registers that
+ // correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/true);
+
+ // Add any DBG_VALUE instructions necessitated by spills.
+ for (auto &SP : Spills)
+ MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst),
+ SP.DebugInst);
+ Spills.clear();
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after propagating", dbgs()));
@@ -559,6 +704,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 0934d8c..0c76478 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -45,7 +45,7 @@
using namespace llvm;
-#define DEBUG_TYPE "livedebug"
+#define DEBUG_TYPE "livedebugvars"
static cl::opt<bool>
EnableLDV("live-debug-variables", cl::init(true),
@@ -54,11 +54,11 @@ EnableLDV("live-debug-variables", cl::init(true),
STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
char LiveDebugVariables::ID = 0;
-INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE,
"Debug Variable Analysis", false, false)
void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -944,7 +944,7 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
IsIndirect, Loc.getReg(), offset, Variable, Expression);
else
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(Loc)
+ .add(Loc)
.addImm(offset)
.addMetadata(Variable)
.addMetadata(Expression);
@@ -1005,8 +1005,8 @@ bool LiveDebugVariables::doInitialization(Module &M) {
return Pass::doInitialization(M);
}
-#ifndef NDEBUG
-LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
index afe87a5..1d7e3d4 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -59,7 +59,7 @@ public:
void emitDebugValues(VirtRegMap *VRM);
/// dump - Print data structures to dbgs().
- void dump();
+ void dump() const;
private:
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 623af49..9ef9f23 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -863,6 +863,37 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
+void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
+ LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+
+ LaneBitmask ToApply = LaneMask;
+ for (SubRange &SR : subranges()) {
+ LaneBitmask SRMask = SR.LaneMask;
+ LaneBitmask Matching = SRMask & LaneMask;
+ if (Matching.none())
+ continue;
+
+ SubRange *MatchingRange;
+ if (SRMask == Matching) {
+ // The subrange fits (it does not cover bits outside \p LaneMask).
+ MatchingRange = &SR;
+ } else {
+ // We have to split the subrange into a matching and non-matching part.
+ // Reduce lanemask of existing lane to non-matching part.
+ SR.LaneMask = SRMask & ~Matching;
+ // Create a new subrange for the matching part
+ MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ }
+ Apply(*MatchingRange);
+ ToApply &= ~Matching;
+ }
+ // Create a new subrange if there are uncovered bits left.
+ if (ToApply.any()) {
+ SubRange *NewRange = createSubRange(Allocator, ToApply);
+ Apply(*NewRange);
+ }
+}
+
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
for (const Segment &S : segments)
@@ -1032,6 +1063,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// When they exist, Spills.back().start <= LastStart,
// and WriteI[-1].start <= LastStart.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveRangeUpdater::print(raw_ostream &OS) const {
if (!isDirty()) {
if (LR)
@@ -1058,6 +1090,7 @@ void LiveRangeUpdater::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
print(errs());
}
+#endif
// Determine if A and B should be coalesced.
static inline bool coalescable(const LiveRange::Segment &A,
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 70d3483..471dcea 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1,4 +1,4 @@
-//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,35 +7,52 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the LiveInterval analysis pass which is used
-// by the Linear Scan Register allocator. This pass linearizes the
-// basic blocks of the function in DFS order and computes live intervals for
-// each virtual and physical register.
+/// \file This file implements the LiveInterval analysis pass which is used
+/// by the Linear Scan Register allocator. This pass linearizes the
+/// basic blocks of the function in DFS order and computes live intervals for
+/// each virtual and physical register.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "LiveRangeCalc.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "regalloc"
@@ -59,11 +76,13 @@ static bool EnablePrecomputePhysRegs = false;
#endif // NDEBUG
namespace llvm {
+
cl::opt<bool> UseSegmentSetForPhysRegs(
"use-segment-set-for-physregs", cl::Hidden, cl::init(true),
cl::desc(
"Use segment set for the computation of the live ranges of physregs."));
-}
+
+} // end namespace llvm
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -78,8 +97,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
- DomTree(nullptr), LRCalc(nullptr) {
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) {
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
}
@@ -96,16 +114,14 @@ void LiveIntervals::releaseMemory() {
RegMaskBits.clear();
RegMaskBlocks.clear();
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- delete RegUnitRanges[i];
+ for (LiveRange *LR : RegUnitRanges)
+ delete LR;
RegUnitRanges.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
-/// runOnMachineFunction - calculates LiveIntervals
-///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
@@ -135,14 +151,13 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// print - Implement the dump method.
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
// Dump the regunits.
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- if (LiveRange *LR = RegUnitRanges[i])
- OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n';
+ for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
+ if (LiveRange *LR = RegUnitRanges[Unit])
+ OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n';
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
@@ -152,8 +167,8 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
}
OS << "RegMasks:";
- for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
- OS << ' ' << RegMaskSlots[i];
+ for (SlotIndex Idx : RegMaskSlots)
+ OS << ' ' << Idx;
OS << '\n';
printInstrs(OS);
@@ -165,20 +180,17 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveIntervals::dumpInstrs() const {
+LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
#endif
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
- float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ?
- llvm::huge_valf : 0.0F;
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
-
-/// computeVirtRegInterval - Compute the live interval of a virtual register,
-/// based on defs and uses.
+/// Compute the live interval of a virtual register, based on defs and uses.
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
@@ -200,7 +212,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBlocks.resize(MF->getNumBlockIDs());
// Find all instructions with regmask operands.
- for (MachineBasicBlock &MBB : *MF) {
+ for (const MachineBasicBlock &MBB : *MF) {
std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
RMB.first = RegMaskSlots.size();
@@ -210,7 +222,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(Mask);
}
- for (MachineInstr &MI : MBB) {
+ for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
@@ -245,9 +257,9 @@ void LiveIntervals::computeRegMasks() {
// interference.
//
-/// computeRegUnitInterval - Compute the live range of a register unit, based
-/// on the uses and defs of aliasing registers. The range should be empty,
-/// or contain only dead phi-defs from ABI blocks.
+/// Compute the live range of a register unit, based on the uses and defs of
+/// aliasing registers. The range should be empty, or contain only dead
+/// phi-defs from ABI blocks.
void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
@@ -257,22 +269,30 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
// may share super-registers. That's OK because createDeadDefs() is
// idempotent. It is very rare for a register unit to have multiple roots, so
// uniquing super-registers is probably not worthwhile.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- if (!MRI->reg_empty(*Supers))
- LRCalc->createDeadDefs(LR, *Supers);
+ bool IsReserved = true;
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->createDeadDefs(LR, Reg);
+ // A register unit is considered reserved if all its roots and all their
+ // super registers are reserved.
+ if (!MRI->isReserved(Reg))
+ IsReserved = false;
}
}
// Now extend LR to reach all uses.
// Ignore uses of reserved registers. We only track defs of those.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- unsigned Reg = *Supers;
- if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
- LRCalc->extendToUses(LR, Reg);
+ if (!IsReserved) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LR, Reg);
+ }
}
}
@@ -281,11 +301,9 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
LR.flushSegmentSet();
}
-
-/// computeLiveInRegUnits - Precompute the live ranges of any register units
-/// that are live-in to an ABI block somewhere. Register values can appear
-/// without a corresponding def when entering the entry block or a landing pad.
-///
+/// Precompute the live ranges of any register units that are live-in to an ABI
+/// block somewhere. Register values can appear without a corresponding def when
+/// entering the entry block or a landing pad.
void LiveIntervals::computeLiveInRegUnits() {
RegUnitRanges.resize(TRI->getNumRegUnits());
DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
@@ -294,18 +312,15 @@ void LiveIntervals::computeLiveInRegUnits() {
SmallVector<unsigned, 8> NewRanges;
// Check all basic blocks for live-ins.
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- const MachineBasicBlock *MBB = &*MFI;
-
+ for (const MachineBasicBlock &MBB : *MF) {
// We only care about ABI blocks: Entry + landing pads.
- if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
+ if ((&MBB != &MF->front() && !MBB.isEHPad()) || MBB.livein_empty())
continue;
// Create phi-defs at Begin for all live-in registers.
- SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
- DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
- for (const auto &LI : MBB->liveins()) {
+ SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber());
+ for (const auto &LI : MBB.liveins()) {
for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
@@ -324,16 +339,13 @@ void LiveIntervals::computeLiveInRegUnits() {
DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
// Compute the 'normal' part of the ranges.
- for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) {
- unsigned Unit = NewRanges[i];
+ for (unsigned Unit : NewRanges)
computeRegUnitRange(*RegUnitRanges[Unit], Unit);
- }
}
-
static void createSegmentsForValues(LiveRange &LR,
- iterator_range<LiveInterval::vni_iterator> VNIs) {
- for (auto VNI : VNIs) {
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (VNInfo *VNI : VNIs) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -341,7 +353,7 @@ static void createSegmentsForValues(LiveRange &LR,
}
}
-typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList;
+using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>;
static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
ShrinkToUsesWorkList &WorkList,
@@ -349,7 +361,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
- SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+ SmallPtrSet<const MachineBasicBlock*, 16> LiveOut;
// Extend intervals to reach all uses in WorkList.
while (!WorkList.empty()) {
@@ -368,7 +380,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
!UsedPHIs.insert(VNI).second)
continue;
// The PHI is live, make sure the predecessors are live-out.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -384,7 +396,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -415,22 +427,20 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
ShrinkToUsesWorkList WorkList;
// Visit all instructions reading li->reg.
- for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end();
- I != E; ) {
- MachineInstr *UseMI = &*(I++);
- if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ unsigned Reg = li->reg;
+ for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
+ if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
continue;
- SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
LiveQueryResult LRQ = li->Query(Idx);
VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
// wrong.
- DEBUG(dbgs() << Idx << '\t' << *UseMI
+ DEBUG(dbgs() << Idx << '\t' << UseMI
<< "Warning: Instr claims to read non-existent value in "
- << *li << '\n');
+ << *li << '\n');
continue;
}
// Special case: An early-clobber tied operand reads and writes the
@@ -458,7 +468,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
- for (auto VNI : LI.valnos) {
+ for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -548,7 +558,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
SR.segments.swap(NewLR.segments);
// Remove dead PHI value numbers
- for (auto VNI : SR.valnos) {
+ for (VNInfo *VNI : SR.valnos) {
if (VNI->isUnused())
continue;
const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
@@ -571,8 +581,8 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
ArrayRef<SlotIndex> Undefs) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
+ for (SlotIndex Idx : Indices)
+ LRCalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
}
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -599,13 +609,11 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// Find all blocks that are reachable from KillMBB without leaving VNI's live
// range. It is possible that KillMBB itself is reachable, so start a DFS
// from each successor.
- typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
+ using VisitedTy = df_iterator_default_set<MachineBasicBlock*,9>;
VisitedTy Visited;
- for (MachineBasicBlock::succ_iterator
- SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
- SuccI != SuccE; ++SuccI) {
+ for (MachineBasicBlock *Succ : KillMBB->successors()) {
for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
- I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I = df_ext_begin(Succ, Visited), E = df_ext_end(Succ, Visited);
I != E;) {
MachineBasicBlock *MBB = *I;
@@ -657,9 +665,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
- ++Units) {
- const LiveRange &RURange = getRegUnit(*Units);
+ for (MCRegUnitIterator Unit(VRM->getPhys(Reg), TRI); Unit.isValid();
+ ++Unit) {
+ const LiveRange &RURange = getRegUnit(*Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
@@ -802,9 +810,8 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
// Conservatively return true instead of scanning huge predecessor lists.
if (PHIMBB->pred_size() > 100)
return true;
- for (MachineBasicBlock::const_pred_iterator
- PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
- if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ for (const MachineBasicBlock *Pred : PHIMBB->predecessors())
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(Pred)))
return true;
}
return false;
@@ -831,7 +838,6 @@ LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
return S;
}
-
//===----------------------------------------------------------------------===//
// Register mask functions
//===----------------------------------------------------------------------===//
@@ -864,7 +870,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
return false;
bool Found = false;
- for (;;) {
+ while (true) {
assert(*SlotI >= LiveI->start);
// Loop over all slots overlapping this segment.
while (*SlotI < LiveI->end) {
@@ -895,7 +901,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
// IntervalUpdate class.
//===----------------------------------------------------------------------===//
-// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+/// Toolkit used by handleMove to trim or extend live intervals.
class LiveIntervals::HMEditor {
private:
LiveIntervals& LIS;
@@ -1241,10 +1247,12 @@ private:
LiveRange::iterator NewIdxIn = NewIdxOut;
assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
const SlotIndex SplitPos = NewIdxDef;
+ OldIdxVNI = OldIdxIn->valno;
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
- OldIdxIn->valno);
+ OldIdxOut->valno);
// OldIdxIn and OldIdxVNI are now undef and can be overridden.
// We Slide [NewIdxIn, OldIdxIn) down one position.
// |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
@@ -1514,8 +1522,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
}
- for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
- unsigned Reg = OrigRegs[i];
+ for (unsigned Reg : OrigRegs) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
@@ -1524,16 +1531,16 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!LI.hasAtLeastOneValue())
continue;
- for (LiveInterval::SubRange &S : LI.subranges()) {
+ for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
- }
+
repairOldRegInRange(Begin, End, endIdx, LI, Reg);
}
}
void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (LiveRange *LR = getCachedRegUnit(*Units))
+ for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = getCachedRegUnit(*Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
LR->removeValNo(VNI);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index fc2f233..b3248e5 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,4 +1,4 @@
-//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,16 +16,16 @@
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdlib>
using namespace llvm;
#define DEBUG_TYPE "regalloc"
-
// Merge a LiveInterval's segments. Guarantee no overlaps.
void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
if (Range.empty())
@@ -64,7 +64,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
LiveRange::const_iterator RegEnd = Range.end();
SegmentIter SegPos = Segments.find(RegPos->start);
- for (;;) {
+ while (true) {
assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
SegPos.erase();
if (!SegPos.valid())
@@ -126,25 +126,24 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
CheckedFirstInterference = true;
// Quickly skip interference check for empty sets.
- if (VirtReg->empty() || LiveUnion->empty()) {
+ if (LR->empty() || LiveUnion->empty()) {
SeenAllInterferences = true;
return 0;
}
- // In most cases, the union will start before VirtReg.
- VirtRegI = VirtReg->begin();
+ // In most cases, the union will start before LR.
+ LRI = LR->begin();
LiveUnionI.setMap(LiveUnion->getMap());
- LiveUnionI.find(VirtRegI->start);
+ LiveUnionI.find(LRI->start);
}
- LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveRange::const_iterator LREnd = LR->end();
LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
- assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+ assert(LRI != LREnd && "Reached end of LR");
// Check for overlapping interference.
- while (VirtRegI->start < LiveUnionI.stop() &&
- VirtRegI->end > LiveUnionI.start()) {
+ while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
// This is an overlap, record the interfering register.
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
@@ -161,20 +160,20 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
// The iterators are now not overlapping, LiveUnionI has been advanced
- // beyond VirtRegI.
- assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+ // beyond LRI.
+ assert(LRI->end <= LiveUnionI.start() && "Expected non-overlap");
// Advance the iterator that ends first.
- VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
- if (VirtRegI == VirtRegEnd)
+ LRI = LR->advanceTo(LRI, LiveUnionI.start());
+ if (LRI == LREnd)
break;
// Detect overlap, handle above.
- if (VirtRegI->start < LiveUnionI.stop())
+ if (LRI->start < LiveUnionI.stop())
continue;
// Still not overlapping. Catch up LiveUnionI.
- LiveUnionI.advanceTo(VirtRegI->start);
+ LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
return InterferingVRegs.size();
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index dcc41c1..cde6ccd 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -53,7 +53,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
continue;
removeReg(Reg);
} else if (O->isRegMask())
- removeRegsInMask(*O, nullptr);
+ removeRegsInMask(*O);
}
// Add uses to the set.
@@ -120,12 +120,11 @@ void LivePhysRegs::print(raw_ostream &OS) const {
OS << "\n";
}
-/// Dumps the currently live registers to the debug output.
-LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
dbgs() << " " << *this;
-#endif
}
+#endif
bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
unsigned Reg) const {
@@ -143,63 +142,84 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins()) {
- MCSubRegIndexIterator S(LI.PhysReg, TRI);
- if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) {
- addReg(LI.PhysReg);
+ unsigned Reg = LI.PhysReg;
+ LaneBitmask Mask = LI.LaneMask;
+ MCSubRegIndexIterator S(Reg, TRI);
+ assert(Mask.any() && "Invalid livein mask");
+ if (Mask.all() || !S.isValid()) {
+ addReg(Reg);
continue;
}
for (; S.isValid(); ++S) {
unsigned SI = S.getSubRegIndex();
- if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any())
+ if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any())
addReg(S.getSubReg());
}
}
}
-/// Add pristine registers to the given \p LiveRegs. This function removes
-/// actually saved callee save registers when \p InPrologueEpilogue is false.
-static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
- const MachineFrameInfo &MFI,
- const TargetRegisterInfo &TRI) {
- for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+/// Adds all callee saved registers to \p LiveRegs.
+static void addCalleeSavedRegs(LivePhysRegs &LiveRegs,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
LiveRegs.addReg(*CSR);
+}
+
+/// Adds pristine registers to the given \p LiveRegs. Pristine registers are
+/// callee saved registers that are unused in the function.
+static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ addCalleeSavedRegs(LiveRegs, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
}
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addBlockLiveIns(*Succ);
+ if (!MBB.succ_empty()) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*Succ);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers that are saved and
+ // restored (somewhere); This does not include callee saved registers that
+ // are unused and hence not saved and restored; they are called pristine.
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ addReg(Info.getReg());
+ }
+ }
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- if (MBB.isReturnBlock()) {
- // The return block has no successors whose live-ins we could merge
- // below. So instead we add the callee saved registers manually.
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
- addReg(*I);
- } else {
- addPristines(*this, MF, MFI, *TRI);
- }
+ if (!MBB.succ_empty()) {
+ addPristines(*this, MF);
+ addLiveOutsNoPristines(MBB);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
}
-
- addLiveOutsNoPristines(MBB);
}
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid())
- addPristines(*this, MF, MFI, *TRI);
+ addPristines(*this, MF);
addBlockLiveIns(MBB);
}
-void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
+ const MachineRegisterInfo &MRI,
MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
assert(MBB.livein_empty());
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
@@ -207,10 +227,12 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
LiveRegs.stepBackward(MI);
for (unsigned Reg : LiveRegs) {
+ if (MRI.isReserved(Reg))
+ continue;
// Skip the register if we are about to add one of its super registers.
bool ContainsSuperReg = false;
for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
- if (LiveRegs.contains(*SReg)) {
+ if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) {
ContainsSuperReg = true;
break;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 0128376..8c43c9f 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,14 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
+// Reserve an address that indicates a value that is known to be "undef".
+static VNInfo UndefVNI(0xbad, SlotIndex());
+
void LiveRangeCalc::resetLiveOutMap() {
unsigned NumBlocks = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(NumBlocks);
- EntryInfoMap.clear();
+ EntryInfos.clear();
Map.resize(NumBlocks);
}
@@ -75,34 +78,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
- LaneBitmask Mask = SubMask;
- for (LiveInterval::SubRange &S : LI.subranges()) {
- // A Mask for subregs common to the existing subrange and current def.
- LaneBitmask Common = S.LaneMask & Mask;
- if (Common.none())
- continue;
- LiveInterval::SubRange *CommonRange;
- // A Mask for subregs covered by the subrange but not the current def.
- LaneBitmask RM = S.LaneMask & ~Mask;
- if (RM.any()) {
- // Split the subrange S into two parts: one covered by the current
- // def (CommonRange), and the one not affected by it (updated S).
- S.LaneMask = RM;
- CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
- } else {
- assert(Common == S.LaneMask);
- CommonRange = &S;
- }
+ LI.refineSubRanges(*Alloc, SubMask,
+ [&MO, this](LiveInterval::SubRange &SR) {
if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *CommonRange, MO);
- Mask &= ~Common;
- }
- // Create a new SubRange for subregs we did not cover yet.
- if (Mask.any()) {
- LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *NewRange, MO);
- }
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ });
}
// Create the def in the main liverange. We do not have to do this if
@@ -289,8 +269,7 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
if (UndefOnEntry[BN])
return false;
- auto MarkDefined =
- [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+ auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool {
for (MachineBasicBlock *S : B.successors())
DefOnEntry[S->getNumber()] = true;
DefOnEntry[BN] = true;
@@ -307,11 +286,19 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
// Determine if the exit from the block is reached by some def.
unsigned N = WorkList[i];
MachineBasicBlock &B = *MF->getBlockNumbered(N);
- if (Seen[N] && Map[&B].first != nullptr)
- return MarkDefined(B);
+ if (Seen[N]) {
+ const LiveOutPair &LOB = Map[&B];
+ if (LOB.first != nullptr && LOB.first != &UndefVNI)
+ return MarkDefined(B);
+ }
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
- LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+ // Treat End as not belonging to B.
+ // If LR has a segment S that starts at the next block, i.e. [End, ...),
+ // std::upper_bound will return the segment following S. Instead,
+ // S should be treated as the first segment that does not overlap B.
+ LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(),
+ End.getPrevSlot());
if (UB != LR.begin()) {
LiveRange::Segment &Seg = *std::prev(UB);
if (Seg.end > Begin) {
@@ -384,10 +371,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
#endif
FoundUndef |= MBB->pred_empty();
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
-
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
// Is this a known live-out block?
if (Seen.test(Pred->getNumber())) {
if (VNInfo *VNI = Map[Pred].first) {
@@ -406,7 +390,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
auto EP = LR.extendInBlock(Undefs, Start, End);
VNInfo *VNI = EP.first;
FoundUndef |= EP.second;
- setLiveOutValue(Pred, VNI);
+ setLiveOutValue(Pred, EP.second ? &UndefVNI : VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
@@ -425,7 +409,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
LiveIn.clear();
- FoundUndef |= (TheVNI == nullptr);
+ FoundUndef |= (TheVNI == nullptr || TheVNI == &UndefVNI);
if (Undefs.size() > 0 && FoundUndef)
UniqueVNI = false;
@@ -436,7 +420,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
- assert(TheVNI != nullptr);
+ assert(TheVNI != nullptr && TheVNI != &UndefVNI);
LiveRangeUpdater Updater(&LR);
for (unsigned BN : WorkList) {
SlotIndex Start, End;
@@ -452,22 +436,26 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
// Prepare the defined/undefined bit vectors.
- auto EF = EntryInfoMap.find(&LR);
- if (EF == EntryInfoMap.end()) {
+ EntryInfoMap::iterator Entry;
+ bool DidInsert;
+ std::tie(Entry, DidInsert) = EntryInfos.insert(
+ std::make_pair(&LR, std::make_pair(BitVector(), BitVector())));
+ if (DidInsert) {
+ // Initialize newly inserted entries.
unsigned N = MF->getNumBlockIDs();
- EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
- EF->second.first.resize(N);
- EF->second.second.resize(N);
+ Entry->second.first.resize(N);
+ Entry->second.second.resize(N);
}
- BitVector &DefOnEntry = EF->second.first;
- BitVector &UndefOnEntry = EF->second.second;
+ BitVector &DefOnEntry = Entry->second.first;
+ BitVector &UndefOnEntry = Entry->second.second;
// Multiple values were found, so transfer the work list to the LiveIn array
// where UpdateSSA will use it as a work list.
LiveIn.reserve(WorkList.size());
for (unsigned BN : WorkList) {
MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
- if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ if (Undefs.size() > 0 &&
+ !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
if (MBB == &UseMBB)
@@ -485,9 +473,9 @@ void LiveRangeCalc::updateSSA() {
assert(DomTree && "Missing dominator tree");
// Interate until convergence.
- unsigned Changes;
+ bool Changed;
do {
- Changes = 0;
+ Changed = false;
// Propagate live-out values down the dominator tree, inserting phi-defs
// when necessary.
for (LiveInBlock &I : LiveIn) {
@@ -510,15 +498,20 @@ void LiveRangeCalc::updateSSA() {
IDomValue = Map[IDom->getBlock()];
// Cache the DomTree node that defined the value.
- if (IDomValue.first && !IDomValue.second)
+ if (IDomValue.first && IDomValue.first != &UndefVNI &&
+ !IDomValue.second) {
Map[IDom->getBlock()].second = IDomValue.second =
DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+ }
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- LiveOutPair &Value = Map[*PI];
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ LiveOutPair &Value = Map[Pred];
if (!Value.first || Value.first == IDomValue.first)
continue;
+ if (Value.first == &UndefVNI) {
+ needPHI = true;
+ break;
+ }
// Cache the DomTree node that defined the value.
if (!Value.second)
@@ -542,7 +535,7 @@ void LiveRangeCalc::updateSSA() {
// Create a phi-def if required.
if (needPHI) {
- ++Changes;
+ Changed = true;
assert(Alloc && "Need VNInfo allocator to create PHI-defs");
SlotIndex Start, End;
std::tie(Start, End) = Indexes->getMBBRange(MBB);
@@ -561,7 +554,7 @@ void LiveRangeCalc::updateSSA() {
LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
- } else if (IDomValue.first) {
+ } else if (IDomValue.first && IDomValue.first != &UndefVNI) {
// No phi-def here. Remember incoming value.
I.Value = IDomValue.first;
@@ -573,9 +566,9 @@ void LiveRangeCalc::updateSSA() {
// MBB is live-out and doesn't define its own value.
if (LOP.first == IDomValue.first)
continue;
- ++Changes;
+ Changed = true;
LOP = IDomValue;
}
}
- } while (Changes);
+ } while (Changed);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 1a7598f..d41b782 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -24,6 +24,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -65,7 +66,8 @@ class LiveRangeCalc {
/// registers do not overlap), but the defined/undefined information must
/// be kept separate for each individual range.
/// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ typedef DenseMap<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+ EntryInfoMap EntryInfos;
/// Map each basic block where a live range is live out to the live-out value
/// and its defining block.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 7f1c69c..92cca1a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,8 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ if (Parent && !Parent->isSpillable())
+ LI.markNotSpillable();
// Create empty subranges if the OldReg's interval has them. Do not create
// the main range here---it will be constructed later after the subranges
// have been finalized.
@@ -52,6 +54,14 @@ unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
+ // FIXME: Getting the interval here actually computes it.
+ // In theory, this may not be what we want, but in practice
+ // the createEmptyIntervalFrom API is used when this is not
+ // the case. Generally speaking we just want to annotate the
+ // LiveInterval when it gets created but we cannot do that at
+ // the moment.
+ if (Parent && !Parent->isSpillable())
+ LIS.getInterval(VReg).markNotSpillable();
return VReg;
}
@@ -442,9 +452,6 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
if (VRM)
VRM->grow();
- if (Parent && !Parent->isSpillable())
- LIS.getInterval(VReg).markNotSpillable();
-
NewRegs.push_back(VReg);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp
new file mode 100644
index 0000000..552f4b5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -0,0 +1,231 @@
+//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+///===---------------------------------------------------------------------===//
+///
+/// \file
+/// This pass moves instructions close to the definition of its operands to
+/// shrink live range of the def instruction. The code motion is limited within
+/// the basic block. The moved instruction should have 1 def, and more than one
+/// uses, all of which are the only use of the def.
+///
+///===---------------------------------------------------------------------===//
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "lrshrink"
+
+STATISTIC(NumInstrsHoistedToShrinkLiveRange,
+ "Number of insructions hoisted to shrink live range.");
+
+using namespace llvm;
+
+namespace {
+class LiveRangeShrink : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveRangeShrink() : MachineFunctionPass(ID) {
+ initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Live Range Shrink"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char LiveRangeShrink::ID = 0;
+char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
+
+INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
+ false)
+namespace {
+typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
+
+/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
+/// \p M maintains a map from instruction to its dominating order that satisfies
+/// M[A] > M[B] guarantees that A is dominated by B.
+/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
+/// \p New.
+MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
+ const InstOrderMap &M) {
+ auto NewIter = M.find(&New);
+ if (NewIter == M.end())
+ return Old;
+ if (Old == nullptr)
+ return &New;
+ unsigned OrderOld = M.find(Old)->second;
+ unsigned OrderNew = NewIter->second;
+ if (OrderOld != OrderNew)
+ return OrderOld < OrderNew ? &New : Old;
+ // OrderOld == OrderNew, we need to iterate down from Old to see if it
+ // can reach New, if yes, New is dominated by Old.
+ for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew;
+ I = I->getNextNode())
+ if (I == &New)
+ return &New;
+ return Old;
+}
+
+/// Builds Instruction to its dominating order number map \p M by traversing
+/// from instruction \p Start.
+void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
+ M.clear();
+ unsigned i = 0;
+ for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
+ M[&I] = i++;
+}
+} // end anonymous namespace
+
+bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ InstOrderMap IOM;
+ // Map from register to instruction order (value of IOM) where the
+ // register is used last. When moving instructions up, we need to
+ // make sure all its defs (including dead def) will not cross its
+ // last use when moving up.
+ DenseMap<unsigned, std::pair<unsigned, MachineInstr *>> UseMap;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ bool SawStore = false;
+ BuildInstOrderMap(MBB.begin(), IOM);
+ UseMap.clear();
+
+ for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
+ MachineInstr &MI = *Next;
+ ++Next;
+ if (MI.isPHI() || MI.isDebugValue())
+ continue;
+ if (MI.mayStore())
+ SawStore = true;
+
+ unsigned CurrentOrder = IOM[&MI];
+ unsigned Barrier = 0;
+ MachineInstr *BarrierMI = nullptr;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDebug())
+ continue;
+ if (MO.isUse())
+ UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI);
+ else if (MO.isDead() && UseMap.count(MO.getReg()))
+ // Barrier is the last instruction where MO get used. MI should not
+ // be moved above Barrier.
+ if (Barrier < UseMap[MO.getReg()].first) {
+ Barrier = UseMap[MO.getReg()].first;
+ BarrierMI = UseMap[MO.getReg()].second;
+ }
+ }
+
+ if (!MI.isSafeToMove(nullptr, SawStore)) {
+ // If MI has side effects, it should become a barrier for code motion.
+ // IOM is rebuild from the next instruction to prevent later
+ // instructions from being moved before this MI.
+ if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+ BuildInstOrderMap(Next, IOM);
+ SawStore = false;
+ }
+ continue;
+ }
+
+ const MachineOperand *DefMO = nullptr;
+ MachineInstr *Insert = nullptr;
+
+ // Number of live-ranges that will be shortened. We do not count
+ // live-ranges that are defined by a COPY as it could be coalesced later.
+ unsigned NumEligibleUse = 0;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDead() || MO.isDebug())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Do not move the instruction if it def/uses a physical register,
+ // unless it is a constant physical register or a noreg.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Reg || MRI.isConstantPhysReg(Reg))
+ continue;
+ Insert = nullptr;
+ break;
+ }
+ if (MO.isDef()) {
+ // Do not move if there is more than one def.
+ if (DefMO) {
+ Insert = nullptr;
+ break;
+ }
+ DefMO = &MO;
+ } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO &&
+ MRI.getRegClass(DefMO->getReg()) ==
+ MRI.getRegClass(MO.getReg())) {
+ // The heuristic does not handle different register classes yet
+ // (registers of different sizes, looser/tighter constraints). This
+ // is because it needs more accurate model to handle register
+ // pressure correctly.
+ MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
+ if (!DefInstr.isCopy())
+ NumEligibleUse++;
+ Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
+ } else {
+ Insert = nullptr;
+ break;
+ }
+ }
+
+ // If Barrier equals IOM[I], traverse forward to find if BarrierMI is
+ // after Insert, if yes, then we should not hoist.
+ for (MachineInstr *I = Insert; I && IOM[I] == Barrier;
+ I = I->getNextNode())
+ if (I == BarrierMI) {
+ Insert = nullptr;
+ break;
+ }
+ // Move the instruction when # of shrunk live range > 1.
+ if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
+ MachineBasicBlock::iterator I = std::next(Insert->getIterator());
+ // Skip all the PHI and debug instructions.
+ while (I != MBB.end() && (I->isPHI() || I->isDebugValue()))
+ I = std::next(I);
+ if (I == MI.getIterator())
+ continue;
+
+ // Update the dominator order to be the same as the insertion point.
+ // We do this to maintain a non-decreasing order without need to update
+ // all instruction orders after the insertion point.
+ unsigned NewOrder = IOM[&*I];
+ IOM[&MI] = NewOrder;
+ NumInstrsHoistedToShrinkLiveRange++;
+
+ // Find MI's debug value following MI.
+ MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
+ if (MI.getOperand(0).isReg())
+ for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
+ EndIter->getOperand(0).isReg() &&
+ EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
+ ++EndIter, ++Next)
+ IOM[&*EndIter] = NewOrder;
+ MBB.splice(I, &MBB, MI.getIterator(), EndIter);
+ }
+ }
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 7a51386..60033db 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,4 +1,4 @@
-//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//===- LiveRegMatrix.cpp - Track register interference --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +14,19 @@
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -36,8 +43,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
"Live Register Matrix", false, false)
-LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
- UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {}
void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -169,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
return Result;
}
-LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
unsigned RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
- Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
@@ -190,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
return IK_RegUnit;
// Check the matrix for virtual register interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (query(VirtReg, *Units).checkInterference())
- return IK_VirtReg;
+ bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
+ if (Interference)
+ return IK_VirtReg;
return IK_Free;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp
new file mode 100644
index 0000000..f9ba4ff
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -0,0 +1,132 @@
+//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file imlements the LiveRegUnits set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.reset(U);
+ }
+ }
+}
+
+void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.set(U);
+ }
+ }
+}
+
+void LiveRegUnits::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (!O->isDef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ removeReg(Reg);
+ } else if (O->isRegMask())
+ removeRegsNotPreserved(O->getRegMask());
+ }
+
+ // Add uses to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ addReg(Reg);
+ }
+}
+
+void LiveRegUnits::accumulate(const MachineInstr &MI) {
+ // Add defs, uses and regmask clobbers to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!O->isDef() && !O->readsReg())
+ continue;
+ addReg(Reg);
+ } else if (O->isRegMask())
+ addRegsInMask(O->getRegMask());
+ }
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveUnits.
+static void addBlockLiveIns(LiveRegUnits &LiveUnits,
+ const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
+}
+
+/// Adds all callee saved registers to \p LiveUnits.
+static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
+ LiveUnits.addReg(*CSR);
+}
+
+/// Adds pristine registers to the given \p LiveUnits. Pristine registers are
+/// callee saved registers that are unused in the function.
+static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ addCalleeSavedRegs(LiveUnits, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ LiveUnits.removeReg(Info.getReg());
+}
+
+void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ if (!MBB.succ_empty()) {
+ addPristines(*this, MF);
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
+ }
+}
+
+void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ addPristines(*this, MF);
+ addBlockLiveIns(*this, MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
index dbf1f96..b51f8b0 100644
--- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,10 +25,10 @@ using namespace llvm;
#define DEBUG_TYPE "livestacks"
char LiveStacks::ID = 0;
-INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_END(LiveStacks, "livestacks",
+INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE,
"Live Stack Slot Analysis", false, false)
char &llvm::LiveStacksID = LiveStacks::ID;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index 269b990a31..a9aec92 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -64,8 +64,8 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
return nullptr;
}
-LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
E = AliveBlocks.end(); I != E; ++I)
@@ -78,8 +78,8 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << "\n #" << i << ": " << *Kills[i];
dbgs() << "\n";
}
-#endif
}
+#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
@@ -767,7 +767,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
MachineBasicBlock *SuccBB) {
const unsigned NumNew = BB->getNumber();
- SmallSet<unsigned, 16> Defs, Kills;
+ DenseSet<unsigned> Defs, Kills;
MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
for (; BBI != BBE && BBI->isPHI(); ++BBI) {
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index e189fb0..b109f19 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -23,6 +22,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -103,10 +103,10 @@ namespace {
char LocalStackSlotPass::ID = 0;
char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
-INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc",
+INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
+INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE,
"Local Stack Slot Allocation", false, false)
diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
index d74b730..1c682e7 100644
--- a/contrib/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,54 +18,21 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LLT::LLT(Type &Ty, const DataLayout &DL) {
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
- ElementsOrAddrSpace = VTy->getNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ auto NumElements = VTy->getNumElements();
+ LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
+ if (NumElements == 1)
+ return ScalarTy;
+ return LLT::vector(NumElements, ScalarTy);
} else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- Kind = Pointer;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
} else if (Ty.isSized()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- Kind = Scalar;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = 1;
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
+ return LLT::scalar(SizeInBits);
}
-}
-
-LLT::LLT(MVT VT) {
- if (VT.isVector()) {
- SizeInBits = VT.getVectorElementType().getSizeInBits();
- ElementsOrAddrSpace = VT.getVectorNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
- } else if (VT.isValid()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- Kind = Scalar;
- SizeInBits = VT.getSizeInBits();
- ElementsOrAddrSpace = 1;
- assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
- }
-}
-
-void LLT::print(raw_ostream &OS) const {
- if (isVector())
- OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
- else if (isPointer())
- OS << "p" << getAddressSpace();
- else if (isValid()) {
- assert(isScalar() && "unexpected type");
- OS << "s" << getScalarSizeInBits();
- } else
- llvm_unreachable("trying to print an invalid type");
+ return LLT();
}
diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 6966c8c..0fc48d4 100644
--- a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
@@ -28,14 +29,12 @@ using namespace llvm;
namespace {
class LowerEmuTLS : public ModulePass {
- const TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { }
- explicit LowerEmuTLS(const TargetMachine *TM)
- : ModulePass(ID), TM(TM) {
+ LowerEmuTLS() : ModulePass(ID) {
initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
}
+
bool runOnModule(Module &M) override;
private:
bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
@@ -54,19 +53,22 @@ private:
char LowerEmuTLS::ID = 0;
-INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
- "Add __emutls_[vt]. variables for emultated TLS model",
- false, false)
+INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE,
+ "Add __emutls_[vt]. variables for emultated TLS model", false,
+ false)
-ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) {
- return new LowerEmuTLS(TM);
-}
+ModulePass *llvm::createLowerEmuTLSPass() { return new LowerEmuTLS(); }
bool LowerEmuTLS::runOnModule(Module &M) {
if (skipModule(M))
return false;
- if (!TM || !TM->Options.EmulatedTLS)
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.Options.EmulatedTLS)
return false;
bool Changed = false;
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 1f1ce6e..58a655a 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
}
+static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '"')
+ return None;
+ return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
+ ErrorCallback);
+}
+
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
auto Range = C;
C.advance(); // Skip '%'
@@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
return R.remaining();
+ if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
+ return R.remaining();
Token.reset(MIToken::Error, C.remaining());
ErrorCallback(C.location(),
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index edba749..08b82e5 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include <functional>
namespace llvm {
@@ -127,7 +127,8 @@ struct MIToken {
NamedIRValue,
IRValue,
QuotedIRValue, // `<constant value>`
- SubRegisterIndex
+ SubRegisterIndex,
+ StringConstant
};
private:
@@ -168,7 +169,8 @@ public:
bool isMemoryOperandFlag() const {
return Kind == kw_volatile || Kind == kw_non_temporal ||
- Kind == kw_dereferenceable || Kind == kw_invariant;
+ Kind == kw_dereferenceable || Kind == kw_invariant ||
+ Kind == StringConstant;
}
bool is(TokenKind K) const { return Kind == K; }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index c8bed08..c68d87b 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -11,12 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "MIParser.h"
#include "MILexer.h"
+#include "MIParser.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -24,25 +34,57 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <utility>
using namespace llvm;
PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
- SourceMgr &SM, const SlotMapping &IRSlots)
- : MF(MF), SM(&SM), IRSlots(IRSlots) {
+ SourceMgr &SM, const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
+ Names2RegBanks(Names2RegBanks) {
}
VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -99,6 +141,8 @@ class MIParser {
StringMap<unsigned> Names2DirectTargetFlags;
/// Maps from direct target flag names to the bitmask target flag values.
StringMap<unsigned> Names2BitmaskTargetFlags;
+ /// Maps from MMO target flag names to MMO target flag values.
+ StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -131,7 +175,8 @@ public:
bool
parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
- bool parseBasicBlock(MachineBasicBlock &MBB);
+ bool parseBasicBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock *&AddFalthroughFrom);
bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
@@ -139,6 +184,7 @@ public:
bool parseVirtualRegister(VRegInfo *&Info);
bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
+ bool parseRegisterClassOrBank(VRegInfo &RegInfo);
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
bool parseRegisterOperand(MachineOperand &Dest,
@@ -172,6 +218,7 @@ public:
bool parseIntrinsicOperand(MachineOperand &Dest);
bool parsePredicateOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
bool parseMachineOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
@@ -184,6 +231,8 @@ public:
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID);
+ bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
private:
@@ -272,6 +321,18 @@ private:
///
/// Return true if the name isn't a name of a bitmask target flag.
bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
+
+ void initNames2MMOTargetFlags();
+
+ /// Try to convert a name of a MachineMemOperand target flag to the
+ /// corresponding target flag.
+ ///
+ /// Return true if the name isn't a name of a target MMO flag.
+ bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+
+ /// parseStringConstant
+ /// ::= StringConstant
+ bool parseStringConstant(std::string &Result);
};
} // end anonymous namespace
@@ -512,7 +573,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
return false;
}
-bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB,
+ MachineBasicBlock *&AddFalthroughFrom) {
// Skip the definition.
assert(Token.is(MIToken::MachineBasicBlockLabel));
lex();
@@ -532,10 +594,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
//
// is equivalent to
// liveins: %edi, %esi
+ bool ExplicitSuccessors = false;
while (true) {
if (Token.is(MIToken::kw_successors)) {
if (parseBasicBlockSuccessors(MBB))
return true;
+ ExplicitSuccessors = true;
} else if (Token.is(MIToken::kw_liveins)) {
if (parseBasicBlockLiveins(MBB))
return true;
@@ -551,10 +615,9 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
// Parse the instructions.
bool IsInBundle = false;
MachineInstr *PrevMI = nullptr;
- while (true) {
- if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof))
- return false;
- else if (consumeIfPresent(MIToken::Newline))
+ while (!Token.is(MIToken::MachineBasicBlockLabel) &&
+ !Token.is(MIToken::Eof)) {
+ if (consumeIfPresent(MIToken::Newline))
continue;
if (consumeIfPresent(MIToken::rbrace)) {
// The first parsing pass should verify that all closing '}' have an
@@ -586,6 +649,22 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
lex();
}
+
+ // Construct successor list by searching for basic block machine operands.
+ if (!ExplicitSuccessors) {
+ SmallVector<MachineBasicBlock*,4> Successors;
+ bool IsFallthrough;
+ guessSuccessors(MBB, Successors, IsFallthrough);
+ for (MachineBasicBlock *Succ : Successors)
+ MBB.addSuccessor(Succ);
+
+ if (IsFallthrough) {
+ AddFalthroughFrom = &MBB;
+ } else {
+ MBB.normalizeSuccProbs();
+ }
+ }
+
return false;
}
@@ -599,11 +678,18 @@ bool MIParser::parseBasicBlocks() {
// The first parsing pass should have verified that this token is a MBB label
// in the 'parseBasicBlockDefinitions' method.
assert(Token.is(MIToken::MachineBasicBlockLabel));
+ MachineBasicBlock *AddFalthroughFrom = nullptr;
do {
MachineBasicBlock *MBB = nullptr;
if (parseMBBReference(MBB))
return true;
- if (parseBasicBlock(*MBB))
+ if (AddFalthroughFrom) {
+ if (!AddFalthroughFrom->isSuccessor(MBB))
+ AddFalthroughFrom->addSuccessor(MBB);
+ AddFalthroughFrom->normalizeSuccProbs();
+ AddFalthroughFrom = nullptr;
+ }
+ if (parseBasicBlock(*MBB, AddFalthroughFrom))
return true;
// The method 'parseBasicBlock' should parse the whole block until the next
// block or the end of file.
@@ -878,6 +964,66 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
}
}
+bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
+ if (Token.isNot(MIToken::Identifier) && Token.isNot(MIToken::underscore))
+ return error("expected '_', register class, or register bank name");
+ StringRef::iterator Loc = Token.location();
+ StringRef Name = Token.stringValue();
+
+ // Was it a register class?
+ auto RCNameI = PFS.Names2RegClasses.find(Name);
+ if (RCNameI != PFS.Names2RegClasses.end()) {
+ lex();
+ const TargetRegisterClass &RC = *RCNameI->getValue();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::NORMAL:
+ RegInfo.Kind = VRegInfo::NORMAL;
+ if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ return error(Loc, Twine("conflicting register classes, previously: ") +
+ Twine(TRI.getRegClassName(RegInfo.D.RC)));
+ }
+ RegInfo.D.RC = &RC;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ return error(Loc, "register class specification on generic register");
+ }
+ llvm_unreachable("Unexpected register kind");
+ }
+
+ // Should be a register bank or a generic register.
+ const RegisterBank *RegBank = nullptr;
+ if (Name != "_") {
+ auto RBNameI = PFS.Names2RegBanks.find(Name);
+ if (RBNameI == PFS.Names2RegBanks.end())
+ return error(Loc, "expected '_', register class, or register bank name");
+ RegBank = RBNameI->getValue();
+ }
+
+ lex();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ RegInfo.Kind = RegBank ? VRegInfo::REGBANK : VRegInfo::GENERIC;
+ if (RegInfo.Explicit && RegInfo.D.RegBank != RegBank)
+ return error(Loc, "conflicting generic register banks");
+ RegInfo.D.RegBank = RegBank;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::NORMAL:
+ return error(Loc, "register bank specification on normal register");
+ }
+ llvm_unreachable("Unexpected register kind");
+}
+
bool MIParser::parseRegisterFlag(unsigned &Flags) {
const unsigned OldFlags = Flags;
switch (Token.kind()) {
@@ -1004,6 +1150,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
+ if (Token.is(MIToken::colon)) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("register class specification expects a virtual register");
+ lex();
+ if (parseRegisterClassOrBank(*RegInfo))
+ return true;
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
if ((Flags & RegState::Define) == 0) {
if (consumeIfPresent(MIToken::lparen)) {
@@ -1598,6 +1751,35 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask");
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegMask(Mask);
+ return false;
+}
+
bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_liveout));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
@@ -1695,8 +1877,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
- }
- LLVM_FALLTHROUGH;
+ } else
+ return parseCustomRegisterMaskOperand(Dest);
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -1867,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
case MIToken::kw_invariant:
Flags |= MachineMemOperand::MOInvariant;
break;
- // TODO: parse the target specific memory operand flags.
+ case MIToken::StringConstant: {
+ MachineMemOperand::Flags TF;
+ if (getMMOTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target MMO flag '" + Token.stringValue() +
+ "'");
+ Flags |= TF;
+ break;
+ }
default:
llvm_unreachable("The current token should be a memory operand flag");
}
@@ -1909,7 +2098,7 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
// The token was already consumed, so use return here instead of break.
return false;
}
- case MIToken::kw_call_entry: {
+ case MIToken::kw_call_entry:
lex();
switch (Token.kind()) {
case MIToken::GlobalValue:
@@ -1929,7 +2118,6 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
"expected a global value or an external symbol after 'call-entry'");
}
break;
- }
default:
llvm_unreachable("The current token should be pseudo source value");
}
@@ -1969,6 +2157,48 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
return false;
}
+bool MIParser::parseOptionalScope(LLVMContext &Context,
+ SyncScope::ID &SSID) {
+ SSID = SyncScope::System;
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") {
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected '(' in syncscope");
+
+ std::string SSN;
+ if (parseStringConstant(SSN))
+ return true;
+
+ SSID = Context.getOrInsertSyncScopeID(SSN);
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected ')' in syncscope");
+ }
+
+ return false;
+}
+
+bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
+ Order = AtomicOrdering::NotAtomic;
+ if (Token.isNot(MIToken::Identifier))
+ return false;
+
+ Order = StringSwitch<AtomicOrdering>(Token.stringValue())
+ .Case("unordered", AtomicOrdering::Unordered)
+ .Case("monotonic", AtomicOrdering::Monotonic)
+ .Case("acquire", AtomicOrdering::Acquire)
+ .Case("release", AtomicOrdering::Release)
+ .Case("acq_rel", AtomicOrdering::AcquireRelease)
+ .Case("seq_cst", AtomicOrdering::SequentiallyConsistent)
+ .Default(AtomicOrdering::NotAtomic);
+
+ if (Order != AtomicOrdering::NotAtomic) {
+ lex();
+ return false;
+ }
+
+ return error("expected an atomic scope, ordering or a size integer literal");
+}
+
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (expectAndConsume(MIToken::lparen))
return true;
@@ -1986,6 +2216,19 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
Flags |= MachineMemOperand::MOStore;
lex();
+ // Optional synchronization scope.
+ SyncScope::ID SSID;
+ if (parseOptionalScope(MF.getFunction()->getContext(), SSID))
+ return true;
+
+ // Up to two atomic orderings (cmpxchg provides guarantees on failure).
+ AtomicOrdering Order, FailureOrder;
+ if (parseOptionalAtomicOrdering(Order))
+ return true;
+
+ if (parseOptionalAtomicOrdering(FailureOrder))
+ return true;
+
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected the size integer literal after memory operation");
uint64_t Size;
@@ -2040,8 +2283,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
}
if (expectAndConsume(MIToken::rparen))
return true;
- Dest =
- MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range,
+ SSID, Order, FailureOrder);
return false;
}
@@ -2254,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
return false;
}
+void MIParser::initNames2MMOTargetFlags() {
+ if (!Names2MMOTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2MMOTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getMMOTargetFlag(StringRef Name,
+ MachineMemOperand::Flags &Flag) {
+ initNames2MMOTargetFlags();
+ auto FlagInfo = Names2MMOTargetFlags.find(Name);
+ if (FlagInfo == Names2MMOTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+bool MIParser::parseStringConstant(std::string &Result) {
+ if (Token.isNot(MIToken::StringConstant))
+ return error("expected string constant");
+ Result = Token.stringValue();
+ lex();
+ return false;
+}
+
bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
StringRef Src,
SMDiagnostic &Error) {
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 93a4d84..2307881 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -1,4 +1,4 @@
-//===- MIParser.h - Machine Instructions Parser ---------------------------===//
+//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,21 +15,19 @@
#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Allocator.h"
namespace llvm {
-class StringRef;
-class BasicBlock;
class MachineBasicBlock;
class MachineFunction;
-class MachineInstr;
-class MachineRegisterInfo;
class MDNode;
class RegisterBank;
struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
+class StringRef;
class TargetRegisterClass;
struct VRegInfo {
@@ -45,11 +43,16 @@ struct VRegInfo {
unsigned PreferredReg = 0;
};
+using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
+using Name2RegBankMap = StringMap<const RegisterBank *>;
+
struct PerFunctionMIParsingState {
BumpPtrAllocator Allocator;
MachineFunction &MF;
SourceMgr *SM;
const SlotMapping &IRSlots;
+ const Name2RegClassMap &Names2RegClasses;
+ const Name2RegBankMap &Names2RegBanks;
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, VRegInfo*> VRegInfos;
@@ -59,7 +62,9 @@ struct PerFunctionMIParsingState {
DenseMap<unsigned, unsigned> JumpTableSlots;
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
- const SlotMapping &IRSlots);
+ const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks);
VRegInfo &getVRegInfo(unsigned VReg);
};
@@ -115,4 +120,4 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 3dff114..78b57f3 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -50,18 +50,24 @@ namespace llvm {
/// file.
class MIRParserImpl {
SourceMgr SM;
+ yaml::Input In;
StringRef Filename;
LLVMContext &Context;
- StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
SlotMapping IRSlots;
/// Maps from register class names to register classes.
- StringMap<const TargetRegisterClass *> Names2RegClasses;
+ Name2RegClassMap Names2RegClasses;
/// Maps from register bank names to register banks.
- StringMap<const RegisterBank *> Names2RegBanks;
+ Name2RegBankMap Names2RegBanks;
+ /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
+ /// created and inserted into the given module when this is true.
+ bool NoLLVMIR = false;
+ /// True when a well formed MIR file does not contain any MIR/machine function
+ /// parts.
+ bool NoMIRDocuments = false;
public:
- MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
- LLVMContext &Context);
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
+ StringRef Filename, LLVMContext &Context);
void reportDiagnostic(const SMDiagnostic &Diag);
@@ -85,22 +91,22 @@ public:
/// file.
///
/// Return null if an error occurred.
- std::unique_ptr<Module> parse();
+ std::unique_ptr<Module> parseIRModule();
+
+ bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
/// Parse the machine function in the current YAML document.
///
- /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR.
- /// A dummy IR function is created and inserted into the given module when
- /// this parameter is true.
///
/// Return true if an error occurred.
- bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR);
+ bool parseMachineFunction(Module &M, MachineModuleInfo &MMI);
/// Initialize the machine function to the state that's described in the MIR
/// file.
///
/// Return true if error occurred.
- bool initializeMachineFunction(MachineFunction &MF);
+ bool initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+ MachineFunction &MF);
bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF);
@@ -144,9 +150,6 @@ private:
SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
- /// Create an empty function with the given name.
- void createDummyFunction(StringRef Name, Module &M);
-
void initNames2RegClasses(const MachineFunction &MF);
void initNames2RegBanks(const MachineFunction &MF);
@@ -166,10 +169,19 @@ private:
} // end namespace llvm
+static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
+ reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
+}
+
MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
StringRef Filename, LLVMContext &Context)
- : SM(), Filename(Filename), Context(Context) {
- SM.AddNewSourceBuffer(std::move(Contents), SMLoc());
+ : SM(),
+ In(SM.getMemoryBuffer(
+ SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(),
+ nullptr, handleYAMLDiag, this),
+ Filename(Filename),
+ Context(Context) {
+ In.setContext(&In);
}
bool MIRParserImpl::error(const Twine &Message) {
@@ -206,24 +218,16 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
}
-static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
- reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
-}
-
-std::unique_ptr<Module> MIRParserImpl::parse() {
- yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
- /*Ctxt=*/nullptr, handleYAMLDiag, this);
- In.setContext(&In);
-
+std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
if (!In.setCurrentDocument()) {
if (In.error())
return nullptr;
// Create an empty module when the MIR file is empty.
+ NoMIRDocuments = true;
return llvm::make_unique<Module>(Filename, Context);
}
std::unique_ptr<Module> M;
- bool NoLLVMIR = false;
// Parse the block scalar manually so that we can return unique pointer
// without having to go trough YAML traits.
if (const auto *BSN =
@@ -237,49 +241,68 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
}
In.nextDocument();
if (!In.setCurrentDocument())
- return M;
+ NoMIRDocuments = true;
} else {
// Create an new, empty module.
M = llvm::make_unique<Module>(Filename, Context);
NoLLVMIR = true;
}
+ return M;
+}
+
+bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+ if (NoMIRDocuments)
+ return false;
// Parse the machine functions.
do {
- if (parseMachineFunction(In, *M, NoLLVMIR))
- return nullptr;
+ if (parseMachineFunction(M, MMI))
+ return true;
In.nextDocument();
} while (In.setCurrentDocument());
- return M;
-}
-
-bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
- bool NoLLVMIR) {
- auto MF = llvm::make_unique<yaml::MachineFunction>();
- yaml::EmptyContext Ctx;
- yaml::yamlize(In, *MF, false, Ctx);
- if (In.error())
- return true;
- auto FunctionName = MF->Name;
- if (Functions.find(FunctionName) != Functions.end())
- return error(Twine("redefinition of machine function '") + FunctionName +
- "'");
- Functions.insert(std::make_pair(FunctionName, std::move(MF)));
- if (NoLLVMIR)
- createDummyFunction(FunctionName, M);
- else if (!M.getFunction(FunctionName))
- return error(Twine("function '") + FunctionName +
- "' isn't defined in the provided LLVM IR");
return false;
}
-void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
+/// Create an empty function with the given name.
+static Function *createDummyFunction(StringRef Name, Module &M) {
auto &Context = M.getContext();
Function *F = cast<Function>(M.getOrInsertFunction(
Name, FunctionType::get(Type::getVoidTy(Context), false)));
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
new UnreachableInst(Context, BB);
+ return F;
+}
+
+bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
+ // Parse the yaml.
+ yaml::MachineFunction YamlMF;
+ yaml::EmptyContext Ctx;
+ yaml::yamlize(In, YamlMF, false, Ctx);
+ if (In.error())
+ return true;
+
+ // Search for the corresponding IR function.
+ StringRef FunctionName = YamlMF.Name;
+ Function *F = M.getFunction(FunctionName);
+ if (!F) {
+ if (NoLLVMIR) {
+ F = createDummyFunction(FunctionName, M);
+ } else {
+ return error(Twine("function '") + FunctionName +
+ "' isn't defined in the provided LLVM IR");
+ }
+ }
+ if (MMI.getMachineFunction(*F) != nullptr)
+ return error(Twine("redefinition of machine function '") + FunctionName +
+ "'");
+
+ // Create the MachineFunction.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ if (initializeMachineFunction(YamlMF, MF))
+ return true;
+
+ return false;
}
static bool isSSA(const MachineFunction &MF) {
@@ -319,13 +342,12 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
Properties.set(MachineFunctionProperties::Property::NoVRegs);
}
-bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
- auto It = Functions.find(MF.getName());
- if (It == Functions.end())
- return error(Twine("no machine function information for function '") +
- MF.getName() + "' in the MIR file");
+bool
+MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+ MachineFunction &MF) {
// TODO: Recreate the machine function.
- const yaml::MachineFunction &YamlMF = *It->getValue();
+ initNames2RegClasses(MF);
+ initNames2RegBanks(MF);
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
@@ -338,7 +360,8 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
if (YamlMF.Selected)
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
- PerFunctionMIParsingState PFS(MF, SM, IRSlots);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
+ Names2RegBanks);
if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
@@ -362,9 +385,6 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
}
PFS.SM = &SM;
- if (MF.empty())
- return error(Twine("machine function '") + Twine(MF.getName()) +
- "' requires at least one machine basic block in its body");
// Initialize the frame information after creating all the MBBs so that the
// MBB references in the frame information can be resolved.
if (initializeFrameInfo(PFS, YamlMF))
@@ -462,17 +482,19 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
RegInfo.addLiveIn(Reg, VReg);
}
- // Parse the callee saved register mask.
- BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
- if (!YamlMF.CalleeSavedRegisters)
- return false;
- for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
- unsigned Reg = 0;
- if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
- return error(Error, RegSource.SourceRange);
- CalleeSavedRegisterMask[Reg] = true;
+ // Parse the callee saved registers (Registers that will
+ // be saved for the caller).
+ if (YamlMF.CalleeSavedRegisters) {
+ SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ RegInfo.setCalleeSavedRegs(CalleeSavedRegisters);
}
- RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
+
return false;
}
@@ -505,14 +527,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
// Compute MachineRegisterInfo::UsedPhysRegMask
- if (!YamlMF.CalleeSavedRegisters) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
- }
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
}
}
}
@@ -539,7 +559,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
MFI.setAdjustsStack(YamlMFI.AdjustsStack);
MFI.setHasCalls(YamlMFI.HasCalls);
- MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ if (YamlMFI.MaxCallFrameSize != ~0u)
+ MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
MFI.setHasVAStart(YamlMFI.HasVAStart);
MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
@@ -818,7 +839,6 @@ void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
StringRef Name) {
- initNames2RegClasses(MF);
auto RegClassInfo = Names2RegClasses.find(Name);
if (RegClassInfo == Names2RegClasses.end())
return nullptr;
@@ -827,7 +847,6 @@ const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
StringRef Name) {
- initNames2RegBanks(MF);
auto RegBankInfo = Names2RegBanks.find(Name);
if (RegBankInfo == Names2RegBanks.end())
return nullptr;
@@ -839,16 +858,18 @@ MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
MIRParser::~MIRParser() {}
-std::unique_ptr<Module> MIRParser::parseLLVMModule() { return Impl->parse(); }
+std::unique_ptr<Module> MIRParser::parseIRModule() {
+ return Impl->parseIRModule();
+}
-bool MIRParser::initializeMachineFunction(MachineFunction &MF) {
- return Impl->initializeMachineFunction(MF);
+bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+ return Impl->parseMachineFunctions(M, MMI);
}
std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
SMDiagnostic &Error,
LLVMContext &Context) {
- auto FileOrErr = MemoryBuffer::getFile(Filename);
+ auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + EC.message());
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index db87092..ddeacf1 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -12,36 +12,72 @@
//
//===----------------------------------------------------------------------===//
-#include "MIRPrinter.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
+static cl::opt<bool> SimplifyMIR("simplify-mir",
+ cl::desc("Leave out unnecessary information when printing MIR"));
+
namespace {
/// This structure describes how to print out stack object references.
@@ -104,6 +140,11 @@ class MIPrinter {
ModuleSlotTracker &MST;
const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
+ /// Synchronization scope names registered with LLVMContext.
+ SmallVector<StringRef, 8> SSNs;
+
+ bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const;
+ bool canPredictSuccessors(const MachineBasicBlock &MBB) const;
public:
MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
@@ -124,7 +165,9 @@ public:
void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
unsigned I, bool ShouldPrintRegisterTies,
LLT TypeToPrint, bool IsDef = false);
- void print(const MachineMemOperand &Op);
+ void print(const LLVMContext &Context, const TargetInstrInfo &TII,
+ const MachineMemOperand &Op);
+ void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
};
@@ -139,6 +182,7 @@ template <> struct BlockScalarTraits<Module> {
static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
Mod.print(OS, nullptr);
}
+
static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
llvm_unreachable("LLVM Module is supposed to be parsed separately");
return "";
@@ -202,9 +246,30 @@ void MIRPrinter::print(const MachineFunction &MF) {
}
StrOS.flush();
yaml::Output Out(OS);
+ if (!SimplifyMIR)
+ Out.setWriteDefaultValues(true);
Out << YamlMF;
}
+static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ assert(RegMask && "Can't print an empty register mask");
+ OS << StringRef("CustomRegMask(");
+
+ bool IsRegInRegMaskFound = false;
+ for (int I = 0, E = TRI->getNumRegs(); I < E; I++) {
+ // Check whether the register is asserted in regmask.
+ if (RegMask[I / 32] & (1u << (I % 32))) {
+ if (IsRegInRegMaskFound)
+ OS << ',';
+ printReg(I, OS, TRI);
+ IsRegInRegMaskFound = true;
+ }
+ }
+
+ OS << ')';
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
@@ -239,20 +304,18 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
printReg(I->second, LiveIn.VirtualRegister, TRI);
MF.LiveIns.push_back(LiveIn);
}
- // The used physical register mask is printed as an inverted callee saved
- // register mask.
- const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
- if (UsedPhysRegMask.none())
- return;
- std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
- for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
- if (!UsedPhysRegMask[I]) {
+
+ // Prints the callee saved registers.
+ if (RegInfo.isUpdatedCSRsInitialized()) {
+ const MCPhysReg *CalleeSavedRegs = RegInfo.getCalleeSavedRegs();
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
yaml::FlowStringValue Reg;
- printReg(I, Reg, TRI);
+ printReg(*I, Reg, TRI);
CalleeSavedRegisters.push_back(Reg);
}
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
- MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
void MIRPrinter::convert(ModuleSlotTracker &MST,
@@ -267,7 +330,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
YamlMFI.MaxAlignment = MFI.getMaxAlignment();
YamlMFI.AdjustsStack = MFI.adjustsStack();
YamlMFI.HasCalls = MFI.hasCalls();
- YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize();
+ YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed()
+ ? MFI.getMaxCallFrameSize() : ~0u;
YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
YamlMFI.HasVAStart = MFI.hasVAStart();
YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
@@ -434,6 +498,62 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
RegisterMaskIds.insert(std::make_pair(Mask, I++));
}
+void llvm::guessSuccessors(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineBasicBlock*> &Result,
+ bool &IsFallthrough) {
+ SmallPtrSet<MachineBasicBlock*,8> Seen;
+
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isPHI())
+ continue;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isMBB())
+ continue;
+ MachineBasicBlock *Succ = MO.getMBB();
+ auto RP = Seen.insert(Succ);
+ if (RP.second)
+ Result.push_back(Succ);
+ }
+ }
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ IsFallthrough = I == MBB.end() || !I->isBarrier();
+}
+
+bool
+MIPrinter::canPredictBranchProbabilities(const MachineBasicBlock &MBB) const {
+ if (MBB.succ_size() <= 1)
+ return true;
+ if (!MBB.hasSuccessorProbabilities())
+ return true;
+
+ SmallVector<BranchProbability,8> Normalized(MBB.Probs.begin(),
+ MBB.Probs.end());
+ BranchProbability::normalizeProbabilities(Normalized.begin(),
+ Normalized.end());
+ SmallVector<BranchProbability,8> Equal(Normalized.size());
+ BranchProbability::normalizeProbabilities(Equal.begin(), Equal.end());
+
+ return std::equal(Normalized.begin(), Normalized.end(), Equal.begin());
+}
+
+bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const {
+ SmallVector<MachineBasicBlock*,8> GuessedSuccs;
+ bool GuessedFallthrough;
+ guessSuccessors(MBB, GuessedSuccs, GuessedFallthrough);
+ if (GuessedFallthrough) {
+ const MachineFunction &MF = *MBB.getParent();
+ MachineFunction::const_iterator NextI = std::next(MBB.getIterator());
+ if (NextI != MF.end()) {
+ MachineBasicBlock *Next = const_cast<MachineBasicBlock*>(&*NextI);
+ if (!is_contained(GuessedSuccs, Next))
+ GuessedSuccs.push_back(Next);
+ }
+ }
+ if (GuessedSuccs.size() != MBB.succ_size())
+ return false;
+ return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin());
+}
+
void MIPrinter::print(const MachineBasicBlock &MBB) {
assert(MBB.getNumber() >= 0 && "Invalid MBB number");
OS << "bb." << MBB.getNumber();
@@ -472,13 +592,15 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
bool HasLineAttributes = false;
// Print the successors
- if (!MBB.succ_empty()) {
+ bool canPredictProbs = canPredictBranchProbabilities(MBB);
+ if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs ||
+ !canPredictSuccessors(MBB))) {
OS.indent(2) << "successors: ";
for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
if (I != MBB.succ_begin())
OS << ", ";
printMBBReference(**I);
- if (MBB.hasSuccessorProbabilities())
+ if (!SimplifyMIR || !canPredictProbs)
OS << '('
<< format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
<< ')';
@@ -614,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) {
if (!MI.memoperands_empty()) {
OS << " :: ";
+ const LLVMContext &Context = MF->getFunction()->getContext();
bool NeedComma = false;
for (const auto *Op : MI.memoperands()) {
if (NeedComma)
OS << ", ";
- print(*Op);
+ print(Context, *TII, *Op);
NeedComma = true;
}
}
@@ -823,7 +946,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
OS << "%const." << Op.getIndex();
printOffset(Op.getOffset());
break;
- case MachineOperand::MO_TargetIndex: {
+ case MachineOperand::MO_TargetIndex:
OS << "target-index(";
if (const auto *Name = getTargetIndexName(
*Op.getParent()->getParent()->getParent(), Op.getIndex()))
@@ -833,15 +956,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
OS << ')';
printOffset(Op.getOffset());
break;
- }
case MachineOperand::MO_JumpTableIndex:
OS << "%jump-table." << Op.getIndex();
break;
- case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_ExternalSymbol: {
+ StringRef Name = Op.getSymbolName();
OS << '$';
- printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+ if (Name.empty()) {
+ OS << "\"\"";
+ } else {
+ printLLVMNameWithoutPrefix(OS, Name);
+ }
printOffset(Op.getOffset());
break;
+ }
case MachineOperand::MO_GlobalAddress:
Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
printOffset(Op.getOffset());
@@ -860,7 +988,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
if (RegMaskInfo != RegisterMaskIds.end())
OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
else
- llvm_unreachable("Can't print this machine register mask yet.");
+ printCustomRegMask(Op.getRegMask(), OS, TRI);
break;
}
case MachineOperand::MO_RegisterLiveOut: {
@@ -909,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
}
}
-void MIPrinter::print(const MachineMemOperand &Op) {
+static const char *getTargetMMOFlagName(const TargetInstrInfo &TII,
+ unsigned TMMOFlag) {
+ auto Flags = TII.getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TMMOFlag) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII,
+ const MachineMemOperand &Op) {
OS << '(';
- // TODO: Print operand's target specific flags.
if (Op.isVolatile())
OS << "volatile ";
if (Op.isNonTemporal())
@@ -920,12 +1059,29 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << "dereferenceable ";
if (Op.isInvariant())
OS << "invariant ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag1)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1)
+ << "\" ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag2)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2)
+ << "\" ";
+ if (Op.getFlags() & MachineMemOperand::MOTargetFlag3)
+ OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3)
+ << "\" ";
if (Op.isLoad())
OS << "load ";
else {
assert(Op.isStore() && "Non load machine operand must be a store");
OS << "store ";
}
+
+ printSyncScope(Context, Op.getSyncScopeID());
+
+ if (Op.getOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getOrdering()) << ' ';
+ if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getFailureOrdering()) << ' ';
+
OS << Op.getSize();
if (const Value *Val = Op.getValue()) {
OS << (Op.isLoad() ? " from " : " into ");
@@ -988,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << ')';
}
+void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) {
+ switch (SSID) {
+ case SyncScope::System: {
+ break;
+ }
+ default: {
+ if (SSNs.empty())
+ Context.getSyncScopeNames(SSNs);
+
+ OS << "syncscope(\"";
+ PrintEscapedString(SSNs[SSID], OS);
+ OS << "\") ";
+ break;
+ }
+ }
+}
+
static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
const TargetRegisterInfo *TRI) {
int Reg = TRI->getLLVMRegNum(DwarfReg, true);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.h b/contrib/llvm/lib/CodeGen/MIRPrinter.h
deleted file mode 100644
index 16aa903..0000000
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- MIRPrinter.h - MIR serialization format printer --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the functions that print out the LLVM IR and the machine
-// functions using the MIR serialization format.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_MIRPRINTER_H
-#define LLVM_LIB_CODEGEN_MIRPRINTER_H
-
-namespace llvm {
-
-class MachineFunction;
-class Module;
-class raw_ostream;
-
-/// Print LLVM IR using the MIR serialization format to the given output stream.
-void printMIR(raw_ostream &OS, const Module &M);
-
-/// Print a machine function using the MIR serialization format to the given
-/// output stream.
-void printMIR(raw_ostream &OS, const MachineFunction &MF);
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index c690bcf..09354cf 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -12,10 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MIRPrinter.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+
#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 3869f97..81597af 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -148,8 +149,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition()))
+ while (I != E && (I->isPHI() || I->isPosition() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels
// inside the bundle.
@@ -160,8 +164,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
@@ -221,11 +228,17 @@ LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
}
#endif
+bool MachineBasicBlock::isLegalToHoistInto() const {
+ if (isReturnBlock() || hasEHPadSuccessor())
+ return false;
+ return true;
+}
+
StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
else
- return "(null)";
+ return StringRef("", 0);
}
/// Return a hopefully unique identifier for this block.
@@ -343,6 +356,13 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
LiveIns.erase(I);
}
+MachineBasicBlock::livein_iterator
+MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) {
+ // Get non-const version of iterator.
+ LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin());
+ return LiveIns.erase(LI);
+}
+
bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
livein_iterator I = find_if(
LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
@@ -417,7 +437,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc DL; // FIXME: this is nowhere
+ DebugLoc DL = findBranchDebugLoc();
bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -485,7 +505,7 @@ void MachineBasicBlock::updateTerminator() {
// FIXME: This does not seem like a reasonable pattern to support, but it
// has been seen in the wild coming out of degenerate ARM test cases.
TII->removeBranch(*this);
-
+
// Finally update the unconditional successor to be reached via a branch if
// it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
@@ -681,16 +701,16 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
return std::next(I) == MachineFunction::const_iterator(MBB);
}
-bool MachineBasicBlock::canFallThrough() {
+MachineBasicBlock *MachineBasicBlock::getFallThrough() {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == getParent()->end())
- return false;
+ return nullptr;
// If FallthroughBlock isn't a successor, no fallthrough is possible.
if (!isSuccessor(&*Fallthrough))
- return false;
+ return nullptr;
// Analyze the branches, if any, at the end of the block.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -702,25 +722,31 @@ bool MachineBasicBlock::canFallThrough() {
// is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
// Barrier is predicated and thus no longer an actual control barrier.
- return empty() || !back().isBarrier() || TII->isPredicated(back());
+ return (empty() || !back().isBarrier() || TII->isPredicated(back()))
+ ? &*Fallthrough
+ : nullptr;
}
// If there is no branch, control always falls through.
- if (!TBB) return true;
+ if (!TBB) return &*Fallthrough;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
- return true;
+ return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
- if (Cond.empty()) return false;
+ if (Cond.empty()) return nullptr;
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
- return FBB == nullptr;
+ return (FBB == nullptr) ? &*Fallthrough : nullptr;
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ return getFallThrough() != nullptr;
}
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
@@ -1144,6 +1170,24 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return {};
}
+/// Find and return the merged DebugLoc of the branch instructions of the block.
+/// Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findBranchDebugLoc() {
+ DebugLoc DL;
+ auto TI = getFirstTerminator();
+ while (TI != end() && !TI->isBranch())
+ ++TI;
+
+ if (TI != end()) {
+ DL = TI->getDebugLoc();
+ for (++TI ; TI != end() ; ++TI)
+ if (TI->isBranch())
+ DL = DILocation::getMergedLocation(DL, TI->getDebugLoc());
+ }
+ return DL;
+}
+
/// Return probability of the edge from this block to MBB.
BranchProbability
MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 7d5124d..4d1ec11 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -26,9 +26,8 @@
using namespace llvm;
-#define DEBUG_TYPE "block-freq"
+#define DEBUG_TYPE "machine-block-freq"
-#ifndef NDEBUG
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
@@ -43,10 +42,37 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
"profile count if available.")));
+// Similar option above, but used to control BFI display only after MBP pass
+cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
+ "view-block-layout-with-bfi", cl::Hidden,
+ cl::desc(
+ "Pop up a window to show a dag displaying MBP layout and associated "
+ "block frequencies of the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count",
+ "display a graph using the real "
+ "profile count if available.")));
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+// Command line option to specify hot frequency threshold.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
extern cl::opt<unsigned> ViewHotFreqPercent;
+static GVDAGType getGVDT() {
+ if (ViewBlockLayoutWithBFI != GVDT_None)
+ return ViewBlockLayoutWithBFI;
+
+ return ViewMachineBlockFreqPropagationDAG;
+}
+
namespace llvm {
template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
@@ -80,12 +106,32 @@ template <>
struct DOTGraphTraits<MachineBlockFrequencyInfo *>
: public MBFIDOTGraphTraitsBase {
explicit DOTGraphTraits(bool isSimple = false)
- : MBFIDOTGraphTraitsBase(isSimple) {}
+ : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {}
+
+ const MachineFunction *CurFunc;
+ DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineBlockFrequencyInfo *Graph) {
- return MBFIDOTGraphTraitsBase::getNodeLabel(
- Node, Graph, ViewMachineBlockFreqPropagationDAG);
+
+ int layout_order = -1;
+ // Attach additional ordering information if 'isSimple' is false.
+ if (!isSimple()) {
+ const MachineFunction *F = Node->getParent();
+ if (!CurFunc || F != CurFunc) {
+ if (CurFunc)
+ LayoutOrderMap.clear();
+
+ CurFunc = F;
+ int O = 0;
+ for (auto MBI = F->begin(); MBI != F->end(); ++MBI, ++O) {
+ LayoutOrderMap[&*MBI] = O;
+ }
+ }
+ layout_order = LayoutOrderMap[Node];
+ }
+ return MBFIDOTGraphTraitsBase::getNodeLabel(Node, Graph, getGVDT(),
+ layout_order);
}
std::string getNodeAttributes(const MachineBasicBlock *Node,
@@ -102,13 +148,12 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
};
} // end namespace llvm
-#endif
-INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE,
"Machine Block Frequency Analysis", true, true)
char MachineBlockFrequencyInfo::ID = 0;
@@ -127,20 +172,24 @@ void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
- MachineBranchProbabilityInfo &MBPI =
- getAnalysis<MachineBranchProbabilityInfo>();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+void MachineBlockFrequencyInfo::calculate(
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI) {
if (!MBFI)
MBFI.reset(new ImplType);
MBFI->calculate(F, MBPI, MLI);
-#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
F.getName().equals(ViewBlockFreqFuncName))) {
- view();
+ view("MachineBlockFrequencyDAGS." + F.getName());
}
-#endif
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI =
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ calculate(F, MBPI, MLI);
return false;
}
@@ -148,15 +197,9 @@ void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
-void MachineBlockFrequencyInfo::view() const {
-// This code is only for debugging.
-#ifndef NDEBUG
- ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this),
- "MachineBlockFrequencyDAGs");
-#else
- errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
- "on systems with Graphviz or gv!\n";
-#endif // NDEBUG
+void MachineBlockFrequencyInfo::view(const Twine &Name, bool isSimple) const {
+ // This code is only for debugging.
+ ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), Name, isSimple);
}
BlockFrequency
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 40e3840..447ad62 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -25,22 +25,23 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "BranchFolding.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -49,6 +50,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <functional>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "block-placement"
@@ -82,19 +85,6 @@ static cl::opt<unsigned> ExitBlockBias(
// Definition:
// - Outlining: placement of a basic block outside the chain or hot path.
-static cl::opt<bool> OutlineOptionalBranches(
- "outline-optional-branches",
- cl::desc("Outlining optional branches will place blocks that are optional "
- "branches, i.e. branches with a common post dominator, outside "
- "the hot path or chain"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> OutlineOptionalThreshold(
- "outline-optional-threshold",
- cl::desc("Don't outline optional branches that are a single block with an "
- "instruction count below this threshold"),
- cl::init(4), cl::Hidden);
-
static cl::opt<unsigned> LoopToColdBlockRatio(
"loop-to-cold-block-ratio",
cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
@@ -136,20 +126,55 @@ BranchFoldPlacement("branch-fold-placement",
cl::init(true), cl::Hidden);
// Heuristic for tail duplication.
-static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+static cl::opt<unsigned> TailDupPlacementThreshold(
"tail-dup-placement-threshold",
cl::desc("Instruction cutoff for tail duplication during layout. "
"Tail merging during layout is forced to have a threshold "
"that won't conflict."), cl::init(2),
cl::Hidden);
+// Heuristic for aggressive tail duplication.
+static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
+ "tail-dup-placement-aggressive-threshold",
+ cl::desc("Instruction cutoff for aggressive tail duplication during "
+ "layout. Used at -O3. Tail merging during layout is forced to "
+ "have a threshold that won't conflict."), cl::init(3),
+ cl::Hidden);
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDupPlacementPenalty(
+ "tail-dup-placement-penalty",
+ cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
+ "Copying can increase fallthrough, but it also increases icache "
+ "pressure. This parameter controls the penalty to account for that. "
+ "Percent as integer."),
+ cl::init(2),
+ cl::Hidden);
+
+// Heuristic for triangle chains.
+static cl::opt<unsigned> TriangleChainCount(
+ "triangle-chain-count",
+ cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
+ "triangle tail duplication heuristic to kick in. 0 to disable."),
+ cl::init(2),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi=
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
}
namespace {
@@ -193,12 +218,15 @@ public:
/// \brief Iterator over blocks within the chain.
typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+ typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
/// \brief Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
+ const_iterator begin() const { return Blocks.begin(); }
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
+ const_iterator end() const { return Blocks.end(); }
bool remove(MachineBasicBlock* BB) {
for(iterator i = begin(); i != end(); ++i) {
@@ -217,25 +245,26 @@ public:
/// updating the block -> chain mapping. It does not free or tear down the
/// old chain, but the old chain's block list is no longer valid.
void merge(MachineBasicBlock *BB, BlockChain *Chain) {
- assert(BB);
- assert(!Blocks.empty());
+ assert(BB && "Can't merge a null block.");
+ assert(!Blocks.empty() && "Can't merge into an empty chain.");
// Fast path in case we don't have a chain already.
if (!Chain) {
- assert(!BlockToChain[BB]);
+ assert(!BlockToChain[BB] &&
+ "Passed chain is null, but BB has entry in BlockToChain.");
Blocks.push_back(BB);
BlockToChain[BB] = this;
return;
}
- assert(BB == *Chain->begin());
+ assert(BB == *Chain->begin() && "Passed BB is not head of Chain.");
assert(Chain->begin() != Chain->end());
// Update the incoming blocks to point to this chain, and add them to the
// chain structure.
for (MachineBasicBlock *ChainBB : *Chain) {
Blocks.push_back(ChainBB);
- assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain.");
BlockToChain[ChainBB] = this;
}
}
@@ -264,12 +293,28 @@ public:
namespace {
class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
- typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
+ typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// Pair struct containing basic block and taildup profitiability
+ struct BlockAndTailDupResult {
+ MachineBasicBlock *BB;
+ bool ShouldTailDup;
+ };
+
+ /// Triple struct containing edge weight and the edge.
+ struct WeightedEdge {
+ BlockFrequency Weight;
+ MachineBasicBlock *Src;
+ MachineBasicBlock *Dest;
+ };
/// \brief work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+ /// Edges that have already been computed as optimal.
+ DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
+
/// \brief Machine Function
MachineFunction *F;
@@ -294,7 +339,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const TargetLoweringBase *TLI;
/// \brief A handle to the post dominator tree.
- MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
/// \brief Duplicator used to duplicate tails during placement.
///
@@ -303,10 +348,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// must be done inline.
TailDuplicator TailDup;
- /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
- /// all terminators of the MachineFunction.
- SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
-
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -322,7 +363,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// BlockChain it participates in, if any. We use it to, among other things,
/// allow implicitly defining edges between chains as the existing edges
/// between basic blocks.
- DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+ DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
#ifndef NDEBUG
/// The set of basic blocks that have terminators that cannot be fully
@@ -334,75 +375,107 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
/// if the count goes to 0, add them to the appropriate work list.
- void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter = nullptr);
+ void markChainSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
/// Decrease the UnscheduledPredecessors count for a single block, and
/// if the count goes to 0, add them to the appropriate work list.
void markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *BB,
+ const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
-
BranchProbability
- collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb,
- BranchProbability HotProb);
+ collectViableSuccessors(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb, BranchProbability HotProb);
bool repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt);
- bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain,
- BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToPred);
- bool
- hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &SuccChain, BranchProbability SuccProb,
- BranchProbability RealSuccProb, BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *
- selectBestCandidateBlock(BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &WorkList);
- MachineBasicBlock *
- getFirstUnplacedBlock(const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ bool maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToPred);
+ bool hasBetterLayoutPredecessor(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ BlockAndTailDupResult selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
/// \brief Add a basic block to the work list if it is appropriate.
///
/// If the optional parameter BlockFilter is provided, only MBB
/// present in the set will be added to the worklist. If nullptr
/// is provided, no filtering occurs.
- void fillWorkLists(MachineBasicBlock *MBB,
+ void fillWorkLists(const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
- void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
- MachineBasicBlock *findBestLoopTop(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- BlockFilterSet collectLoopBlockSet(MachineLoop &L);
- void buildLoopChains(MachineLoop &L);
- void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
- void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- void collectMustExecuteBBs();
+ MachineBasicBlock *findBestLoopTop(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
+ void buildLoopChains(const MachineLoop &L);
+ void rotateLoop(
+ BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains();
void optimizeBranches();
void alignBlocks();
+ /// Returns true if a block should be tail-duplicated to increase fallthrough
+ /// opportunities.
+ bool shouldTailDuplicate(MachineBasicBlock *BB);
+ /// Check the edge frequencies to see if tail duplication will increase
+ /// fallthroughs.
+ bool isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability AdjustedSumProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Check for a trellis layout.
+ bool isTrellis(const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Get the best successor given a trellis layout.
+ BlockAndTailDupResult getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ /// Get the best pair of non-conflicting edges.
+ static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+ /// Returns true if a block can tail duplicate into all unplaced
+ /// predecessors. Filters based on loop.
+ bool canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Find chains of triangles to tail-duplicate where a global analysis works,
+ /// but a local analysis would not find them.
+ void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
@@ -415,7 +488,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
+ if (TailDupPlacement)
+ AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -425,20 +499,20 @@ public:
char MachineBlockPlacement::ID = 0;
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
+INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
/// \brief Helper to print the name of a MBB.
///
/// Only used by debug logging.
-static std::string getBlockName(MachineBasicBlock *BB) {
+static std::string getBlockName(const MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
OS << "BB#" << BB->getNumber();
@@ -455,7 +529,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@@ -471,8 +545,8 @@ void MachineBlockPlacement::markChainSuccessors(
/// and was duplicated into the chain end, we need to redo markBlockSuccessors
/// for just that block.
void MachineBlockPlacement::markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter) {
+ const BlockChain &Chain, const MachineBasicBlock *MBB,
+ const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
@@ -504,7 +578,8 @@ void MachineBlockPlacement::markBlockSuccessors(
/// the total branch probability of edges from \p BB to those
/// blocks.
BranchProbability MachineBlockPlacement::collectViableSuccessors(
- MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors) {
// Adjust edge probabilities by excluding edges pointing to blocks that is
// either not in BlockFilter or is already in the current chain. Consider the
@@ -519,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(
// Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
// A->C is chosen as a fall-through, D won't be selected as a successor of C
// due to CFG constraint (the probability of C->D is not greater than
- // HotProb to break top-order). If we exclude E that is not in BlockFilter
- // when calculating the probability of C->D, D will be selected and we
+ // HotProb to break topo-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
// will get A C D B as the layout of this loop.
auto AdjustedSumProb = BranchProbability::getOne();
for (MachineBasicBlock *Succ : BB->successors()) {
@@ -561,46 +636,573 @@ getAdjustedProbability(BranchProbability OrigProb,
return SuccProb;
}
-/// When the option OutlineOptionalBranches is on, this method
-/// checks if the fallthrough candidate block \p Succ (of block
-/// \p BB) also has other unscheduled predecessor blocks which
-/// are also successors of \p BB (forming triangular shape CFG).
-/// If none of such predecessors are small, it returns true.
-/// The caller can choose to select \p Succ as the layout successors
-/// so that \p Succ's predecessors (optional branches) can be
-/// outlined.
-/// FIXME: fold this with more general layout cost analysis.
-bool MachineBlockPlacement::shouldPredBlockBeOutlined(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
- const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
- BranchProbability HotProb) {
- if (!OutlineOptionalBranches)
+/// Check if \p BB has exactly the successors in \p Successors.
+static bool
+hasSameSuccessors(MachineBasicBlock &BB,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
+ if (BB.succ_size() != Successors.size())
+ return false;
+ // We don't want to count self-loops
+ if (Successors.count(&BB))
return false;
- // If we outline optional branches, look whether Succ is unavoidable, i.e.
- // dominates all terminators of the MachineFunction. If it does, other
- // successors must be optional. Don't do this for cold branches.
- if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- // Check whether there is an unplaced optional branch.
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ for (MachineBasicBlock *Succ : BB.successors())
+ if (!Successors.count(Succ))
+ return false;
+ return true;
+}
+
+/// Check if a block should be tail duplicated to increase fallthrough
+/// opportunities.
+/// \p BB Block to check.
+bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ bool IsSimple = TailDup.isSimpleBB(BB);
+
+ if (BB->succ_size() == 1)
+ return false;
+ return TailDup.shouldTailDuplicate(IsSimple, *BB);
+}
+
+/// Compare 2 BlockFrequency's with a small penalty for \p A.
+/// In order to be conservative, we apply a X% penalty to account for
+/// increased icache pressure and static heuristics. For small frequencies
+/// we use only the numerators to improve accuracy. For simplicity, we assume the
+/// penalty is less than 100%
+/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
+static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
+ uint64_t EntryFreq) {
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ BlockFrequency Gain = A - B;
+ return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
+}
+
+/// Check the edge frequencies to see if tail duplication will increase
+/// fallthroughs. It only makes sense to call this function when
+/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
+/// always locally profitable if we would have picked \p Succ without
+/// considering duplication.
+bool MachineBlockPlacement::isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability QProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // We need to do a probability calculation to make sure this is profitable.
+ // First: does succ have a successor that post-dominates? This affects the
+ // calculation. The 2 relevant cases are:
+ // BB BB
+ // | \Qout | \Qout
+ // P| C |P C
+ // = C' = C'
+ // | /Qin | /Qin
+ // | / | /
+ // Succ Succ
+ // / \ | \ V
+ // U/ =V |U \
+ // / \ = D
+ // D E | /
+ // | /
+ // |/
+ // PDom
+ // '=' : Branch taken for that CFG edge
+ // In the second case, Placing Succ while duplicating it into C prevents the
+ // fallthrough of Succ into either D or PDom, because they now have C as an
+ // unplaced predecessor
+
+ // Start by figuring out which case we fall into
+ MachineBasicBlock *PDom = nullptr;
+ SmallVector<MachineBasicBlock *, 4> SuccSuccs;
+ // Only scan the relevant successors
+ auto AdjustedSuccSumProb =
+ collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
+ BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
+ auto BBFreq = MBFI->getBlockFreq(BB);
+ auto SuccFreq = MBFI->getBlockFreq(Succ);
+ BlockFrequency P = BBFreq * PProb;
+ BlockFrequency Qout = BBFreq * QProb;
+ uint64_t EntryFreq = MBFI->getEntryFreq();
+ // If there are no more successors, it is profitable to copy, as it strictly
+ // increases fallthrough.
+ if (SuccSuccs.size() == 0)
+ return greaterWithBias(P, Qout, EntryFreq);
+
+ auto BestSuccSucc = BranchProbability::getZero();
+ // Find the PDom or the best Succ if no PDom exists.
+ for (MachineBasicBlock *SuccSucc : SuccSuccs) {
+ auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
+ if (Prob > BestSuccSucc)
+ BestSuccSucc = Prob;
+ if (PDom == nullptr)
+ if (MPDT->dominates(SuccSucc, Succ)) {
+ PDom = SuccSucc;
+ break;
+ }
+ }
+ // For the comparisons, we need to know Succ's best incoming edge that isn't
+ // from BB.
+ auto SuccBestPred = BlockFrequency(0);
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ if (SuccPred == Succ || SuccPred == BB
+ || BlockToChain[SuccPred] == &Chain
+ || (BlockFilter && !BlockFilter->count(SuccPred)))
+ continue;
+ auto Freq = MBFI->getBlockFreq(SuccPred)
+ * MBPI->getEdgeProbability(SuccPred, Succ);
+ if (Freq > SuccBestPred)
+ SuccBestPred = Freq;
+ }
+ // Qin is Succ's best unplaced incoming edge that isn't BB
+ BlockFrequency Qin = SuccBestPred;
+ // If it doesn't have a post-dominating successor, here is the calculation:
+ // BB BB
+ // | \Qout | \
+ // P| C | =
+ // = C' | C
+ // | /Qin | |
+ // | / | C' (+Succ)
+ // Succ Succ /|
+ // / \ | \/ |
+ // U/ =V | == |
+ // / \ | / \|
+ // D E D E
+ // '=' : Branch taken for that CFG edge
+ // Cost in the first case is: P + V
+ // For this calculation, we always assume P > Qout. If Qout > P
+ // The result of this function will be ignored at the caller.
+ // Let F = SuccFreq - Qin
+ // Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
+
+ if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
+ BranchProbability UProb = BestSuccSucc;
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency F = SuccFreq - Qin;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency QinU = std::min(Qin, F) * UProb;
+ BlockFrequency BaseCost = P + V;
+ BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
+ return greaterWithBias(BaseCost, DupCost, EntryFreq);
+ }
+ BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency U = SuccFreq * UProb;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency F = SuccFreq - Qin;
+ // If there is a post-dominating successor, here is the calculation:
+ // BB BB BB BB
+ // | \Qout | \ | \Qout | \
+ // |P C | = |P C | =
+ // = C' |P C = C' |P C
+ // | /Qin | | | /Qin | |
+ // | / | C' (+Succ) | / | C' (+Succ)
+ // Succ Succ /| Succ Succ /|
+ // | \ V | \/ | | \ V | \/ |
+ // |U \ |U /\ =? |U = |U /\ |
+ // = D = = =?| | D | = =|
+ // | / |/ D | / |/ D
+ // | / | / | = | /
+ // |/ | / |/ | =
+ // Dom Dom Dom Dom
+ // '=' : Branch taken for that CFG edge
+ // The cost for taken branches in the first case is P + U
+ // Let F = SuccFreq - Qin
+ // The cost in the second case (assuming independence), given the layout:
+ // BB, Succ, (C+Succ), D, Dom or the layout:
+ // BB, Succ, D, Dom, (C+Succ)
+ // is Qout + max(F, Qin) * U + min(F, Qin)
+ // compare P + U vs Qout + P * U + Qin.
+ //
+ // The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
+ //
+ // For the 3rd case, the cost is P + 2 * V
+ // For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
+ // We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
+ if (UProb > AdjustedSuccSumProb / 2 &&
+ !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
+ Chain, BlockFilter))
+ // Cases 3 & 4
+ return greaterWithBias(
+ (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
+ EntryFreq);
+ // Cases 1 & 2
+ return greaterWithBias((P + U),
+ (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
+ std::max(Qin, F) * UProb),
+ EntryFreq);
+}
+
+/// Check for a trellis layout. \p BB is the upper part of a trellis if its
+/// successors form the lower part of a trellis. A successor set S forms the
+/// lower part of a trellis if all of the predecessors of S are either in S or
+/// have all of S as successors. We ignore trellises where BB doesn't have 2
+/// successors because for fewer than 2, it's trivial, and for 3 or greater they
+/// are very uncommon and complex to compute optimally. Allowing edges within S
+/// is not strictly a trellis, but the same algorithm works, so we allow it.
+bool MachineBlockPlacement::isTrellis(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // Technically BB could form a trellis with branching factor higher than 2.
+ // But that's extremely uncommon.
+ if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
+ return false;
+
+ SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
+ BB->succ_end());
+ // To avoid reviewing the same predecessors twice.
+ SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
+
+ for (MachineBasicBlock *Succ : ViableSuccs) {
+ int PredCount = 0;
+ for (auto SuccPred : Succ->predecessors()) {
+ // Allow triangle successors, but don't count them.
+ if (Successors.count(SuccPred)) {
+ // Make sure that it is actually a triangle.
+ for (MachineBasicBlock *CheckSucc : SuccPred->successors())
+ if (!Successors.count(CheckSucc))
+ return false;
+ continue;
+ }
+ const BlockChain *PredChain = BlockToChain[SuccPred];
+ if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
+ PredChain == &Chain || PredChain == BlockToChain[Succ])
continue;
- // Check whether the optional branch has exactly one BB.
- if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ ++PredCount;
+ // Perform the successor check only once.
+ if (!SeenPreds.insert(SuccPred).second)
continue;
- // Check whether the optional branch is small.
- if (Pred->size() < OutlineOptionalThreshold)
+ if (!hasSameSuccessors(*SuccPred, Successors))
return false;
}
- return true;
- } else
+ // If one of the successors has only BB as a predecessor, it is not a
+ // trellis.
+ if (PredCount < 1)
+ return false;
+ }
+ return true;
+}
+
+/// Pick the highest total weight pair of edges that can both be laid out.
+/// The edges in \p Edges[0] are assumed to have a different destination than
+/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
+/// the individual highest weight edges to the 2 different destinations, or in
+/// case of a conflict, one of them should be replaced with a 2nd best edge.
+std::pair<MachineBlockPlacement::WeightedEdge,
+ MachineBlockPlacement::WeightedEdge>
+MachineBlockPlacement::getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
+ Edges) {
+ // Sort the edges, and then for each successor, find the best incoming
+ // predecessor. If the best incoming predecessors aren't the same,
+ // then that is clearly the best layout. If there is a conflict, one of the
+ // successors will have to fallthrough from the second best predecessor. We
+ // compare which combination is better overall.
+
+ // Sort for highest frequency.
+ auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
+
+ std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
+ std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ auto BestA = Edges[0].begin();
+ auto BestB = Edges[1].begin();
+ // Arrange for the correct answer to be in BestA and BestB
+ // If the 2 best edges don't conflict, the answer is already there.
+ if (BestA->Src == BestB->Src) {
+ // Compare the total fallthrough of (Best + Second Best) for both pairs
+ auto SecondBestA = std::next(BestA);
+ auto SecondBestB = std::next(BestB);
+ BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
+ BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
+ if (BestAScore < BestBScore)
+ BestA = SecondBestA;
+ else
+ BestB = SecondBestB;
+ }
+ // Arrange for the BB edge to be in BestA if it exists.
+ if (BestB->Src == BB)
+ std::swap(BestA, BestB);
+ return std::make_pair(*BestA, *BestB);
+}
+
+/// Get the best successor from \p BB based on \p BB being part of a trellis.
+/// We only handle trellises with 2 successors, so the algorithm is
+/// straightforward: Find the best pair of edges that don't conflict. We find
+/// the best incoming edge for each successor in the trellis. If those conflict,
+/// we consider which of them should be replaced with the second best.
+/// Upon return the two best edges will be in \p BestEdges. If one of the edges
+/// comes from \p BB, it will be in \p BestEdges[0]
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ BlockAndTailDupResult Result = {nullptr, false};
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+
+ // We assume size 2 because it's common. For general n, we would have to do
+ // the Hungarian algorithm, but it's not worth the complexity because more
+ // than 2 successors is fairly uncommon, and a trellis even more so.
+ if (Successors.size() != 2 || ViableSuccs.size() != 2)
+ return Result;
+
+ // Collect the edge frequencies of all edges that form the trellis.
+ SmallVector<WeightedEdge, 8> Edges[2];
+ int SuccIndex = 0;
+ for (auto Succ : ViableSuccs) {
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ // Skip any placed predecessors that are not BB
+ if (SuccPred != BB)
+ if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
+ BlockToChain[SuccPred] == &Chain ||
+ BlockToChain[SuccPred] == BlockToChain[Succ])
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
+ MBPI->getEdgeProbability(SuccPred, Succ);
+ Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
+ }
+ ++SuccIndex;
+ }
+
+ // Pick the best combination of 2 edges from all the edges in the trellis.
+ WeightedEdge BestA, BestB;
+ std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
+
+ if (BestA.Src != BB) {
+ // If we have a trellis, and BB doesn't have the best fallthrough edges,
+ // we shouldn't choose any successor. We've already looked and there's a
+ // better fallthrough edge for all the successors.
+ DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ return Result;
+ }
+
+ // Did we pick the triangle edge? If tail-duplication is profitable, do
+ // that instead. Otherwise merge the triangle edge now while we know it is
+ // optimal.
+ if (BestA.Dest == BestB.Src) {
+ // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
+ // would be better.
+ MachineBasicBlock *Succ1 = BestA.Dest;
+ MachineBasicBlock *Succ2 = BestB.Dest;
+ // Check to see if tail-duplication would be profitable.
+ if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+ canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
+ isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
+ Chain, BlockFilter)) {
+ DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob << " (Tail Duplicate)\n");
+ Result.BB = Succ2;
+ Result.ShouldTailDup = true;
+ return Result;
+ }
+ }
+ // We have already computed the optimal edge for the other side of the
+ // trellis.
+ ComputedEdges[BestB.Src] = { BestB.Dest, false };
+
+ auto TrellisSucc = BestA.Dest;
+ DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
+ Result.BB = TrellisSucc;
+ return Result;
+}
+
+/// When the option TailDupPlacement is on, this method checks if the
+/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
+/// into all of its unplaced, unfiltered predecessors, that are not BB.
+bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ if (!shouldTailDuplicate(Succ))
return false;
+
+ // For CFG checking.
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Make sure all unplaced and unfiltered predecessors can be
+ // tail-duplicated into.
+ // Skip any blocks that are already placed or not in this loop.
+ if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
+ || BlockToChain[Pred] == &Chain)
+ continue;
+ if (!TailDup.canTailDuplicate(Succ, Pred)) {
+ if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
+ // This will result in a trellis after tail duplication, so we don't
+ // need to copy Succ into this predecessor. In the presence
+ // of a trellis tail duplication can continue to be profitable.
+ // For example:
+ // A A
+ // |\ |\
+ // | \ | \
+ // | C | C+BB
+ // | / | |
+ // |/ | |
+ // BB => BB |
+ // |\ |\/|
+ // | \ |/\|
+ // | D | D
+ // | / | /
+ // |/ |/
+ // Succ Succ
+ //
+ // After BB was duplicated into C, the layout looks like the one on the
+ // right. BB and C now have the same successors. When considering
+ // whether Succ can be duplicated into all its unplaced predecessors, we
+ // ignore C.
+ // We can do this because C already has a profitable fallthrough, namely
+ // D. TODO(iteratee): ignore sufficiently cold predecessors for
+ // duplication and for this test.
+ //
+ // This allows trellises to be laid out in 2 separate chains
+ // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
+ // because it allows the creation of 2 fallthrough paths with links
+ // between them, and we correctly identify the best layout for these
+ // CFGs. We want to extend trellises that the user created in addition
+ // to trellises created by tail-duplication, so we just look for the
+ // CFG.
+ continue;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Find chains of triangles where we believe it would be profitable to
+/// tail-duplicate them all, but a local analysis would not find them.
+/// There are 3 ways this can be profitable:
+/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
+/// longer chains)
+/// 2) The chains are statically correlated. Branch probabilities have a very
+/// U-shaped distribution.
+/// [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
+/// If the branches in a chain are likely to be from the same side of the
+/// distribution as their predecessor, but are independent at runtime, this
+/// transformation is profitable. (Because the cost of being wrong is a small
+/// fixed cost, unlike the standard triangle layout where the cost of being
+/// wrong scales with the # of triangles.)
+/// 3) The chains are dynamically correlated. If the probability that a previous
+/// branch was taken positively influences whether the next branch will be
+/// taken
+/// We believe that 2 and 3 are common enough to justify the small margin in 1.
+void MachineBlockPlacement::precomputeTriangleChains() {
+ struct TriangleChain {
+ std::vector<MachineBasicBlock *> Edges;
+ TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
+ : Edges({src, dst}) {}
+
+ void append(MachineBasicBlock *dst) {
+ assert(getKey()->isSuccessor(dst) &&
+ "Attempting to append a block that is not a successor.");
+ Edges.push_back(dst);
+ }
+
+ unsigned count() const { return Edges.size() - 1; }
+
+ MachineBasicBlock *getKey() const {
+ return Edges.back();
+ }
+ };
+
+ if (TriangleChainCount == 0)
+ return;
+
+ DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ // Map from last block to the chain that contains it. This allows us to extend
+ // chains as we find new triangles.
+ DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
+ for (MachineBasicBlock &BB : *F) {
+ // If BB doesn't have 2 successors, it doesn't start a triangle.
+ if (BB.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PDom = nullptr;
+ for (MachineBasicBlock *Succ : BB.successors()) {
+ if (!MPDT->dominates(Succ, &BB))
+ continue;
+ PDom = Succ;
+ break;
+ }
+ // If BB doesn't have a post-dominating successor, it doesn't form a
+ // triangle.
+ if (PDom == nullptr)
+ continue;
+ // If PDom has a hint that it is low probability, skip this triangle.
+ if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
+ continue;
+ // If PDom isn't eligible for duplication, this isn't the kind of triangle
+ // we're looking for.
+ if (!shouldTailDuplicate(PDom))
+ continue;
+ bool CanTailDuplicate = true;
+ // If PDom can't tail-duplicate into it's non-BB predecessors, then this
+ // isn't the kind of triangle we're looking for.
+ for (MachineBasicBlock* Pred : PDom->predecessors()) {
+ if (Pred == &BB)
+ continue;
+ if (!TailDup.canTailDuplicate(PDom, Pred)) {
+ CanTailDuplicate = false;
+ break;
+ }
+ }
+ // If we can't tail-duplicate PDom to its predecessors, then skip this
+ // triangle.
+ if (!CanTailDuplicate)
+ continue;
+
+ // Now we have an interesting triangle. Insert it if it's not part of an
+ // existing chain.
+ // Note: This cannot be replaced with a call insert() or emplace() because
+ // the find key is BB, but the insert/emplace key is PDom.
+ auto Found = TriangleChainMap.find(&BB);
+ // If it is, remove the chain from the map, grow it, and put it back in the
+ // map with the end as the new key.
+ if (Found != TriangleChainMap.end()) {
+ TriangleChain Chain = std::move(Found->second);
+ TriangleChainMap.erase(Found);
+ Chain.append(PDom);
+ TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
+ } else {
+ auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+ }
+ }
+
+ // Iterating over a DenseMap is safe here, because the only thing in the body
+ // of the loop is inserting into another DenseMap (ComputedEdges).
+ // ComputedEdges is never iterated, so this doesn't lead to non-determinism.
+ for (auto &ChainPair : TriangleChainMap) {
+ TriangleChain &Chain = ChainPair.second;
+ // Benchmarking has shown that due to branch correlation duplicating 2 or
+ // more triangles is profitable, despite the calculations assuming
+ // independence.
+ if (Chain.count() < TriangleChainCount)
+ continue;
+ MachineBasicBlock *dst = Chain.Edges.back();
+ Chain.Edges.pop_back();
+ for (MachineBasicBlock *src : reverse(Chain.Edges)) {
+ DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" <<
+ getBlockName(dst) << " as pre-computed based on triangles.\n");
+
+ auto InsertResult = ComputedEdges.insert({src, {dst, true}});
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+
+ dst = src;
+ }
+ }
}
// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
- MachineBasicBlock *BB) {
+ const MachineBasicBlock *BB) {
if (!BB->getParent()->getFunction()->getEntryCount())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
@@ -609,11 +1211,11 @@ static BranchProbability getLayoutSuccessorProbThreshold(
if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
/* See case 1 below for the cost analysis. For BB->Succ to
* be taken with smaller cost, the following needs to hold:
- * Prob(BB->Succ) > 2* Prob(BB->Pred)
- * So the threshold T
- * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
- * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
- * branch bias, we have
+ * Prob(BB->Succ) > 2 * Prob(BB->Pred)
+ * So the threshold T in the calculation below
+ * (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
+ * So T / (1 - T) = 2, Yielding T = 2/3
+ * Also adding user specified branch bias, we have
* T = (2/3)*(ProfileLikelyProb/50)
* = (2*ProfileLikelyProb)/150)
*/
@@ -625,10 +1227,17 @@ static BranchProbability getLayoutSuccessorProbThreshold(
/// Checks to see if the layout candidate block \p Succ has a better layout
/// predecessor than \c BB. If yes, returns true.
+/// \p SuccProb: The probability adjusted for only remaining blocks.
+/// Only used for logging
+/// \p RealSuccProb: The un-adjusted probability.
+/// \p Chain: The chain that BB belongs to and Succ is being considered for.
+/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
+/// considered
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
- BranchProbability SuccProb, BranchProbability RealSuccProb,
- BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
// There isn't a better layout when there are no unscheduled predecessors.
if (SuccChain.UnscheduledPredecessors == 0)
@@ -689,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | | | |
// ---BB | | BB
// | | | |
- // | pred-- | Succ--
+ // | Pred-- | Succ--
// | | | |
- // ---succ ---pred--
+ // ---Succ ---Pred--
//
// cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
// = freq(S->Pred) + freq(S->BB)
@@ -734,11 +1343,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | Pred----| | S1----
// | | | |
// --(S1 or S2) ---Pred--
+ // |
+ // S2
//
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
// + min(freq(Pred->S1), freq(Pred->S2))
// Non-topo-order cost:
- // In the worst case, S2 will not get laid out after Pred.
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
// is 0. Then the non topo layout is better when
@@ -756,13 +1366,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ BlockToChain[Pred] == &Chain ||
+ // This check is redundant except for look ahead. This function is
+ // called for lookahead by isProfitableToTailDup when BB hasn't been
+ // placed yet.
+ (Pred == BB))
continue;
// Do backward checking.
// For all cases above, we need a backward checking to filter out edges that
- // are not 'strongly' biased. With profile data available, the check is
- // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
- // has more than two successors.
+ // are not 'strongly' biased.
// BB Pred
// \ /
// Succ
@@ -798,14 +1410,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
/// breaking CFG structure, but cave and break such structures in the case of
/// very hot successor edges.
///
-/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *
-MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+/// \returns The best successor block found, or null if none are viable, along
+/// with a boolean indicating if tail duplication is necessary.
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(StaticLikelyProb, 100);
- MachineBasicBlock *BestSucc = nullptr;
+ BlockAndTailDupResult BestSucc = { nullptr, false };
auto BestProb = BranchProbability::getZero();
SmallVector<MachineBasicBlock *, 4> Successors;
@@ -813,22 +1426,45 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
+
+ // if we already precomputed the best successor for BB, return that if still
+ // applicable.
+ auto FoundEdge = ComputedEdges.find(BB);
+ if (FoundEdge != ComputedEdges.end()) {
+ MachineBasicBlock *Succ = FoundEdge->second.BB;
+ ComputedEdges.erase(FoundEdge);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
+ SuccChain != &Chain && Succ == *SuccChain->begin())
+ return FoundEdge->second;
+ }
+
+ // if BB is part of a trellis, Use the trellis to determine the optimal
+ // fallthrough edges
+ if (isTrellis(BB, Successors, Chain, BlockFilter))
+ return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
+ BlockFilter);
+
+ // For blocks with CFG violations, we may be able to lay them out anyway with
+ // tail-duplication. We keep this vector so we can perform the probability
+ // calculations the minimum number of times.
+ SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4>
+ DupCandidates;
for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BranchProbability SuccProb =
getAdjustedProbability(RealSuccProb, AdjustedSumProb);
- // This heuristic is off by default.
- if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
- HotProb))
- return Succ;
-
BlockChain &SuccChain = *BlockToChain[Succ];
// Skip the edge \c BB->Succ if block \c Succ has a better layout
// predecessor that yields lower global cost.
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
- Chain, BlockFilter))
+ Chain, BlockFilter)) {
+ // If tail duplication would make Succ profitable, place it.
+ if (TailDupPlacement && shouldTailDuplicate(Succ))
+ DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
continue;
+ }
DEBUG(
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
@@ -836,17 +1472,48 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestProb >= SuccProb) {
+ if (BestSucc.BB && BestProb >= SuccProb) {
DEBUG(dbgs() << " Not the best candidate, continuing\n");
continue;
}
DEBUG(dbgs() << " Setting it as best candidate\n");
- BestSucc = Succ;
+ BestSucc.BB = Succ;
BestProb = SuccProb;
}
- if (BestSucc)
- DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
+ // Handle the tail duplication candidates in order of decreasing probability.
+ // Stop at the first one that is profitable. Also stop if they are less
+ // profitable than BestSucc. Position is important because we preserve it and
+ // prefer first best match. Here we aren't comparing in order, so we capture
+ // the position instead.
+ if (DupCandidates.size() != 0) {
+ auto cmp =
+ [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
+ const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
+ return std::get<0>(a) > std::get<0>(b);
+ };
+ std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
+ }
+ for(auto &Tup : DupCandidates) {
+ BranchProbability DupProb;
+ MachineBasicBlock *Succ;
+ std::tie(DupProb, Succ) = Tup;
+ if (DupProb < BestProb)
+ break;
+ if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
+ && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
+ DEBUG(
+ dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
+ << DupProb
+ << " (Tail Duplicate)\n");
+ BestSucc.BB = Succ;
+ BestSucc.ShouldTailDup = true;
+ break;
+ }
+ }
+
+ if (BestSucc.BB)
+ DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
return BestSucc;
}
@@ -862,7 +1529,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
@@ -881,13 +1548,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (MachineBasicBlock *MBB : WorkList) {
- assert(MBB->isEHPad() == IsEHPad);
+ assert(MBB->isEHPad() == IsEHPad &&
+ "EHPad mismatch between block and work list.");
BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain)
continue;
- assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
+ assert(SuccChain.UnscheduledPredecessors == 0 &&
+ "Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
@@ -948,16 +1617,19 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::fillWorkLists(
- MachineBasicBlock *MBB,
+ const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter = nullptr) {
BlockChain &Chain = *BlockToChain[MBB];
if (!UpdatedPreds.insert(&Chain).second)
return;
- assert(Chain.UnscheduledPredecessors == 0);
+ assert(
+ Chain.UnscheduledPredecessors == 0 &&
+ "Attempting to place block with unscheduled predecessors in worklist.");
for (MachineBasicBlock *ChainBB : Chain) {
- assert(BlockToChain[ChainBB] == &Chain);
+ assert(BlockToChain[ChainBB] == &Chain &&
+ "Block in chain doesn't match BlockToChain map.");
for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
if (BlockFilter && !BlockFilter->count(Pred))
continue;
@@ -970,23 +1642,23 @@ void MachineBlockPlacement::fillWorkLists(
if (Chain.UnscheduledPredecessors != 0)
return;
- MBB = *Chain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
+ MachineBasicBlock *BB = *Chain.begin();
+ if (BB->isEHPad())
+ EHPadWorkList.push_back(BB);
else
- BlockWorkList.push_back(MBB);
+ BlockWorkList.push_back(BB);
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB, BlockChain &Chain,
+ const MachineBasicBlock *HeadBB, BlockChain &Chain,
BlockFilterSet *BlockFilter) {
- assert(BB && "BB must not be null.\n");
- assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ assert(HeadBB && "BB must not be null.\n");
+ assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
- MachineBasicBlock *LoopHeaderBB = BB;
+ const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
- BB = *std::prev(Chain.end());
+ MachineBasicBlock *BB = *std::prev(Chain.end());
for (;;) {
assert(BB && "null block found at end of chain in loop.");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
@@ -995,7 +1667,11 @@ void MachineBlockPlacement::buildChain(
// Look for the best viable successor if there is one to place immediately
// after this block.
- MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+ auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
+ MachineBasicBlock* BestSucc = Result.BB;
+ bool ShouldTailDup = Result.ShouldTailDup;
+ if (TailDupPlacement)
+ ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -1016,7 +1692,7 @@ void MachineBlockPlacement::buildChain(
// Placement may have changed tail duplication opportunities.
// Check for that now.
- if (TailDupPlacement && BestSucc) {
+ if (TailDupPlacement && BestSucc && ShouldTailDup) {
// If the chosen successor was duplicated into all its predecessors,
// don't bother laying it out, just go round the loop again with BB as
// the chain end.
@@ -1052,7 +1728,7 @@ void MachineBlockPlacement::buildChain(
/// unconditional jump (for the backedge) rotating it in front of the loop
/// header is always profitable.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// Placing the latch block before the header may introduce an extra branch
// that skips this block the first time the loop is executed, which we want
@@ -1116,7 +1792,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -1235,12 +1911,18 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
/// branches. For example, if the loop has fallthrough into its header and out
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
- MachineBasicBlock *ExitingBB,
+ const MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
MachineBasicBlock *Top = *LoopChain.begin();
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
+
+ // If ExitingBB is already the last one in a chain then nothing to do.
+ if (Bottom == ExitingBB)
+ return;
+
bool ViableTopFallthrough = false;
for (MachineBasicBlock *Pred : Top->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
@@ -1255,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
// bottom is a viable exiting block. If so, bail out as rotating will
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
- MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
for (MachineBasicBlock *Succ : Bottom->successors()) {
BlockChain *SuccChain = BlockToChain[Succ];
if (!LoopBlockSet.count(Succ) &&
@@ -1268,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (ExitIt == LoopChain.end())
return;
+ // Rotating a loop exit to the bottom when there is a fallthrough to top
+ // trades the entry fallthrough for an exit fallthrough.
+ // If there is no bottom->top edge, but the chosen exit block does have
+ // a fallthrough, we break that fallthrough for nothing in return.
+
+ // Let's consider an example. We have a built chain of basic blocks
+ // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
+ // By doing a rotation we get
+ // Bk+1, ..., Bn, B1, ..., Bk
+ // Break of fallthrough to B1 is compensated by a fallthrough from Bk.
+ // If we had a fallthrough Bk -> Bk+1 it is broken now.
+ // It might be compensated by fallthrough Bn -> B1.
+ // So we have a condition to avoid creation of extra branch by loop rotation.
+ // All below must be true to avoid loop rotation:
+ // If there is a fallthrough to top (B1)
+ // There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
+ // There is no fallthrough from bottom (Bn) to top (B1).
+ // Please note that there is no exit fallthrough from Bn because we checked it
+ // above.
+ if (ViableTopFallthrough) {
+ assert(std::next(ExitIt) != LoopChain.end() &&
+ "Exit should not be last BB");
+ MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
+ if (ExitingBB->isSuccessor(NextBlockInChain))
+ if (!Bottom->isSuccessor(Top))
+ return;
+ }
+
+ DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
+ << " at bottom\n");
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
@@ -1285,7 +1996,8 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
/// Therefore, the cost for a given rotation is the sum of costs listed above.
/// We select the best rotation with the smallest cost.
void MachineBlockPlacement::rotateLoopWithProfile(
- BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
auto HeaderIter = find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
@@ -1422,7 +2134,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
-MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
+MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
BlockFilterSet LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
@@ -1459,14 +2171,16 @@ MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
-void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
+void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop *InnerLoop : L)
+ for (const MachineLoop *InnerLoop : L)
buildLoopChains(*InnerLoop);
- assert(BlockWorkList.empty());
- assert(EHPadWorkList.empty());
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList not empty when starting to build loop chains.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList not empty when starting to build loop chains.");
BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
// Check if we have profile data for this function. If yes, we will rotate
@@ -1496,10 +2210,11 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
// walk the blocks, and use a set to prevent visiting a particular chain
// twice.
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- assert(LoopChain.UnscheduledPredecessors == 0);
+ assert(LoopChain.UnscheduledPredecessors == 0 &&
+ "LoopChain should not have unscheduled predecessors.");
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
buildChain(LoopTop, LoopChain, &LoopBlockSet);
@@ -1533,7 +2248,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
@@ -1546,31 +2261,6 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
EHPadWorkList.clear();
}
-/// When OutlineOpitonalBranches is on, this method collects BBs that
-/// dominates all terminator blocks of the function \p F.
-void MachineBlockPlacement::collectMustExecuteBBs() {
- if (OutlineOptionalBranches) {
- // Find the nearest common dominator of all of F's terminators.
- MachineBasicBlock *Terminator = nullptr;
- for (MachineBasicBlock &MBB : *F) {
- if (MBB.succ_size() == 0) {
- if (Terminator == nullptr)
- Terminator = &MBB;
- else
- Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
- }
- }
-
- // MBBs dominating this common dominator are unavoidable.
- UnavoidableBlocks.clear();
- for (MachineBasicBlock &MBB : *F) {
- if (MDT->dominates(&MBB, Terminator)) {
- UnavoidableBlocks.insert(&MBB);
- }
- }
- }
-}
-
void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
@@ -1605,16 +2295,15 @@ void MachineBlockPlacement::buildCFGChains() {
}
}
- // Turned on with OutlineOptionalBranches option
- collectMustExecuteBBs();
-
// Build any loop-based chains.
PreferredLoopExit = nullptr;
for (MachineLoop *L : *MLI)
buildLoopChains(*L);
- assert(BlockWorkList.empty());
- assert(EHPadWorkList.empty());
+ assert(BlockWorkList.empty() &&
+ "BlockWorkList should be empty before building final chain.");
+ assert(EHPadWorkList.empty() &&
+ "EHPadWorkList should be empty before building final chain.");
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
for (MachineBasicBlock &MBB : *F)
@@ -1839,7 +2528,7 @@ void MachineBlockPlacement::alignBlocks() {
/// @return true if \p BB was removed.
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt) {
bool Removed, DuplicatedToLPred;
@@ -1901,21 +2590,16 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
/// \return - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain, BlockFilterSet *BlockFilter,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
bool &DuplicatedToLPred) {
-
DuplicatedToLPred = false;
+ if (!shouldTailDuplicate(BB))
+ return false;
+
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
<< BB->getNumber() << "\n");
- bool IsSimple = TailDup.isSimpleBB(BB);
- // Blocks with single successors don't create additional fallthrough
- // opportunities. Don't duplicate them. TODO: When conditional exits are
- // analyzable, allow them to be duplicated.
- if (!IsSimple && BB->succ_size() == 1)
- return false;
- if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
- return false;
+
// This has to be a callback because none of it can be done after
// BB is deleted.
bool Removed = false;
@@ -1967,6 +2651,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ bool IsSimple = TailDup.isSimpleBB(BB);
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
&DuplicatedPreds, &RemovalCallbackRef);
@@ -2006,25 +2691,46 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
- MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = nullptr;
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
PreferredLoopExit = nullptr;
+ assert(BlockToChain.empty() &&
+ "BlockToChain map should be empty before starting placement.");
+ assert(ComputedEdges.empty() &&
+ "Computed Edge map should be empty before starting placement.");
+
+ unsigned TailDupSize = TailDupPlacementThreshold;
+ // If only the aggressive threshold is explicitly set, use it.
+ if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 &&
+ TailDupPlacementThreshold.getNumOccurrences() == 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // For agressive optimization, we can adjust some thresholds to be less
+ // conservative.
+ if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
+ // At O3 we should be more willing to copy blocks for tail duplication. This
+ // increases size pressure, so we only do it at O3
+ // Do this unless only the regular threshold is explicitly set.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+ }
+
if (TailDupPlacement) {
- unsigned TailDupSize = TailDuplicatePlacementThreshold;
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ precomputeTriangleChains();
}
- assert(BlockToChain.empty());
-
buildCFGChains();
// Changing the layout can create new tail merging opportunities.
- TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structured CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
@@ -2032,7 +2738,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
+ unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
@@ -2041,8 +2747,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
- // Must redo the dominator tree if blocks were changed.
- MDT->runOnMachineFunction(MF);
+ ComputedEdges.clear();
+ // Must redo the post-dominator tree if blocks were changed.
+ if (MPDT)
+ MPDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
@@ -2052,6 +2760,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
alignBlocks();
BlockToChain.clear();
+ ComputedEdges.clear();
ChainAllocator.DestroyAll();
if (AlignAllBlock)
@@ -2067,6 +2776,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MBI->setAlignment(AlignAllNonFallThruBlocks);
}
}
+ if (ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MBP." + MF.getName(), false);
+ }
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 0766f46..582ff13 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/SmallSet.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Support/raw_ostream.h"
@@ -108,12 +108,12 @@ namespace {
char MachineCSE::ID = 0;
char &llvm::MachineCSEID = MachineCSE::ID;
-INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
+ "Machine Common Subexpression Elimination", false, false)
/// The source register of a COPY machine instruction can be propagated to all
/// its users, and this propagation could increase the probability of finding
@@ -180,8 +180,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
I = skipDebugInstructionsForward(I, E);
if (I == E)
- // Reached end of block, register is obviously dead.
- return true;
+ // Reached end of block, we don't know if register is dead or not.
+ return false;
bool SeenDef = false;
for (const MachineOperand &MO : I->operands()) {
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 5beed5f..e6f80db 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -8,11 +8,9 @@
//===----------------------------------------------------------------------===//
//
// The machine combiner pass uses machine trace metrics to ensure the combined
-// instructions does not lengthen the critical path or the resource depth.
+// instructions do not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-combiner"
-
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -32,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "machine-combiner"
+
STATISTIC(NumInstCombined, "Number of machineinst combined");
namespace {
@@ -86,11 +86,11 @@ private:
char MachineCombiner::ID = 0;
char &llvm::MachineCombinerID = MachineCombiner::ID;
-INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
"Machine InstCombiner", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
false, false)
void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -135,7 +135,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
+ DEBUG(dbgs() << "NEW INSTR ";
+ InstrPtr->print(dbgs(), TII);
+ dbgs() << "\n";);
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -352,6 +354,19 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVector<MachineInstr *, 16> InsInstrs,
+ SmallVector<MachineInstr *, 16> DelInstrs,
+ MachineTraceMetrics *Traces) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+ ++NumInstCombined;
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+}
+
/// Substitute a slow code sequence with a faster one by
/// evaluating instruction combining pattern.
/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
@@ -406,7 +421,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
@@ -426,23 +440,23 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
- (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- DelInstrs, InstrIdxForVirtReg, P) &&
- preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
- for (auto *InstrPtr : InsInstrs)
- MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
- Changed = true;
- ++NumInstCombined;
-
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
// Eagerly stop after the first pattern fires.
+ Changed = true;
break;
} else {
+ // Calculating the trace metrics may be expensive,
+ // so only do this when necessary.
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ }
// Cleanup instructions of the alternative code sequence. There is no
// use for them.
MachineFunction *MF = MBB->getParent();
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 5de6dec..7d5a681 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -27,7 +27,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "codegen-cp"
+#define DEBUG_TYPE "machine-cp"
STATISTIC(NumDeletes, "Number of dead copies deleted");
@@ -79,7 +79,7 @@ namespace {
char MachineCopyPropagation::ID = 0;
char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
-INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
/// Remove any entry in \p Map where the register is a subregister or equal to
@@ -291,17 +291,9 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (MO.isDef()) {
Defs.push_back(Reg);
- } else {
+ continue;
+ } else if (MO.readsReg())
ReadRegister(Reg);
- }
- // Treat undef use like defs for copy propagation but not for
- // dead copy. We would need to do a liveness check to be sure the copy
- // is dead for undef uses.
- // The backends are allowed to do whatever they want with undef value
- // and we cannot be sure this register will not be rewritten to break
- // some false dependencies for the hardware for instance.
- if (MO.isUndef())
- Defs.push_back(Reg);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index acb7c48..b559e4e 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -12,11 +12,11 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
-
using namespace llvm;
namespace llvm {
-template class DominanceFrontierBase<MachineBasicBlock>;
+template class DominanceFrontierBase<MachineBasicBlock, false>;
+template class DominanceFrontierBase<MachineBasicBlock, true>;
template class ForwardDominanceFrontierBase<MachineBasicBlock>;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
index 303a6a9..845e823 100644
--- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -31,7 +31,7 @@ static cl::opt<bool, true> VerifyMachineDomInfoX(
namespace llvm {
template class DomTreeNodeBase<MachineBasicBlock>;
-template class DominatorTreeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock, false>; // DomTreeBase
}
char MachineDominatorTree::ID = 0;
@@ -49,32 +49,29 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
+ DT.reset(new DomTreeBase<MachineBasicBlock>());
DT->recalculate(F);
-
return false;
}
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new DominatorTreeBase<MachineBasicBlock>(false);
-}
-
-MachineDominatorTree::~MachineDominatorTree() {
- delete DT;
}
void MachineDominatorTree::releaseMemory() {
- DT->releaseMemory();
+ CriticalEdgesToSplit.clear();
+ DT.reset(nullptr);
}
void MachineDominatorTree::verifyAnalysis() const {
- if (VerifyMachineDomInfo)
+ if (DT && VerifyMachineDomInfo)
verifyDomTree();
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
- DT->print(OS);
+ if (DT)
+ DT->print(OS);
}
void MachineDominatorTree::applySplitCriticalEdges() const {
@@ -143,15 +140,18 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
}
void MachineDominatorTree::verifyDomTree() const {
+ if (!DT)
+ return;
MachineFunction &F = *getRoot()->getParent();
- MachineDominatorTree OtherDT;
- OtherDT.DT->recalculate(F);
- if (compare(OtherDT)) {
+ DomTreeBase<MachineBasicBlock> OtherDT;
+ OtherDT.recalculate(F);
+ if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
+ DT->compare(OtherDT)) {
errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
- print(errs(), nullptr);
+ DT->print(errs());
errs() << "\nActual:\n";
- OtherDT.print(errs(), nullptr);
+ OtherDT.print(errs());
abort();
}
}
diff --git a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp
new file mode 100644
index 0000000..73d778f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -0,0 +1,244 @@
+//===-- MachineFrameInfo.cpp ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Implements MachineFrameInfo that manages the stack frame.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+
+#define DEBUG_TYPE "codegen"
+
+using namespace llvm;
+
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+ if (!StackRealignable)
+ assert(Align <= StackAlignment &&
+ "For targets without stack realignment, Align is out of limit!");
+ if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+ unsigned StackAlign) {
+ if (!ShouldClamp || Align <= StackAlign)
+ return Align;
+ DEBUG(dbgs() << "Warning: requested alignment " << Align
+ << " exceeds the stack alignment " << StackAlign
+ << " when stack realignment is off" << '\n');
+ return StackAlign;
+}
+
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSS, const AllocaInst *Alloca) {
+ assert(Size != 0 && "Cannot allocate zero size stack objects!");
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
+ !isSS));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ assert(Index >= 0 && "Bad frame index!");
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+ unsigned Alignment) {
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ CreateStackObject(Size, Alignment, true);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+ const AllocaInst *Alloca) {
+ HasVarSizedObjects = true;
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
+ ensureMaxAlignment(Alignment);
+ return (int)Objects.size()-NumFixedObjects-1;
+}
+
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable, bool isAliased) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned. Note that unlike the non-fixed case, if the
+ // stack needs realignment, we can't assume that the stack will in fact be
+ // aligned.
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/ false,
+ /*Alloca*/ nullptr, isAliased));
+ return -++NumFixedObjects;
+}
+
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset,
+ bool Immutable) {
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/ true,
+ /*Alloca*/ nullptr,
+ /*isAliased*/ false));
+ return -++NumFixedObjects;
+}
+
+BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
+ BV.set(*CSR);
+
+ // Saved CSRs are not pristine.
+ for (auto &I : getCalleeSavedInfo())
+ for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+ BV.reset(*S);
+
+ return BV;
+}
+
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+ assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u &&
+ "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known");
+
+ MaxCallFrameSize = 0;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) {
+ unsigned Size = TII.getFrameSize(MI);
+ MaxCallFrameSize = std::max(MaxCallFrameSize, Size);
+ AdjustsStack = true;
+ } else if (MI.isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+ }
+ }
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index c1d5ea9..742b095 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -20,7 +20,6 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -52,8 +51,6 @@ static cl::opt<unsigned>
cl::desc("Force the alignment of all functions."),
cl::init(0), cl::Hidden);
-void MachineFunctionInitializer::anchor() {}
-
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
typedef MachineFunctionProperties::Property P;
switch(Prop) {
@@ -169,6 +166,7 @@ void MachineFunction::clear() {
InstructionRecycler.clear(Allocator);
OperandRecycler.clear(Allocator);
BasicBlockRecycler.clear(Allocator);
+ VariableDbgInfos.clear();
if (RegInfo) {
RegInfo->~MachineRegisterInfo();
Allocator.Deallocate(RegInfo);
@@ -307,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *MachineFunction::getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
- SynchronizationScope SynchScope, AtomicOrdering Ordering,
+ SyncScope::ID SSID, AtomicOrdering Ordering,
AtomicOrdering FailureOrdering) {
return new (Allocator)
MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
- SynchScope, Ordering, FailureOrdering);
+ SSID, Ordering, FailureOrdering);
}
MachineMemOperand *
@@ -322,13 +320,27 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MachineMemOperand(MachinePointerInfo(MMO->getValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSynchScope(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSynchScope(),
+ AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ const AAMDNodes &AAInfo) {
+ MachinePointerInfo MPI = MMO->getValue() ?
+ MachinePointerInfo(MMO->getValue(), MMO->getOffset()) :
+ MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset());
+
+ return new (Allocator)
+ MachineMemOperand(MPI, MMO->getFlags(), MMO->getSize(),
+ MMO->getBaseAlignment(), AAInfo,
+ MMO->getRanges(), MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -361,7 +373,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
(*I)->getFlags() & ~MachineMemOperand::MOStore,
(*I)->getSize(), (*I)->getBaseAlignment(),
(*I)->getAAInfo(), nullptr,
- (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getSyncScopeID(), (*I)->getOrdering(),
(*I)->getFailureOrdering());
Result[Index] = JustLoad;
}
@@ -395,7 +407,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
(*I)->getSize(), (*I)->getBaseAlignment(),
(*I)->getAAInfo(), nullptr,
- (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getSyncScopeID(), (*I)->getOrdering(),
(*I)->getFailureOrdering());
Result[Index] = JustStore;
}
@@ -756,212 +768,6 @@ void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
/// \}
//===----------------------------------------------------------------------===//
-// MachineFrameInfo implementation
-//===----------------------------------------------------------------------===//
-
-/// Make sure the function is at least Align bytes aligned.
-void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
- if (!StackRealignable)
- assert(Align <= StackAlignment &&
- "For targets without stack realignment, Align is out of limit!");
- if (MaxAlignment < Align) MaxAlignment = Align;
-}
-
-/// Clamp the alignment if requested and emit a warning.
-static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
- unsigned StackAlign) {
- if (!ShouldClamp || Align <= StackAlign)
- return Align;
- DEBUG(dbgs() << "Warning: requested alignment " << Align
- << " exceeds the stack alignment " << StackAlign
- << " when stack realignment is off" << '\n');
- return StackAlign;
-}
-
-/// Create a new statically sized stack object, returning a nonnegative
-/// identifier to represent it.
-int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
- bool isSS, const AllocaInst *Alloca) {
- assert(Size != 0 && "Cannot allocate zero size stack objects!");
- Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
- Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
- !isSS));
- int Index = (int)Objects.size() - NumFixedObjects - 1;
- assert(Index >= 0 && "Bad frame index!");
- ensureMaxAlignment(Alignment);
- return Index;
-}
-
-/// Create a new statically sized stack object that represents a spill slot,
-/// returning a nonnegative identifier to represent it.
-int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
- unsigned Alignment) {
- Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
- CreateStackObject(Size, Alignment, true);
- int Index = (int)Objects.size() - NumFixedObjects - 1;
- ensureMaxAlignment(Alignment);
- return Index;
-}
-
-/// Notify the MachineFrameInfo object that a variable sized object has been
-/// created. This must be created whenever a variable sized object is created,
-/// whether or not the index returned is actually used.
-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
- const AllocaInst *Alloca) {
- HasVarSizedObjects = true;
- Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
- Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
- ensureMaxAlignment(Alignment);
- return (int)Objects.size()-NumFixedObjects-1;
-}
-
-/// Create a new object at a fixed location on the stack.
-/// All fixed objects should be created before other objects are created for
-/// efficiency. By default, fixed objects are immutable. This returns an
-/// index with a negative value.
-int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable, bool isAliased) {
- assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned. Note that unlike the non-fixed case, if the
- // stack needs realignment, we can't assume that the stack will in fact be
- // aligned.
- unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
- Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
- Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/ false,
- /*Alloca*/ nullptr, isAliased));
- return -++NumFixedObjects;
-}
-
-/// Create a spill slot at a fixed location on the stack.
-/// Returns an index with a negative value.
-int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
- int64_t SPOffset,
- bool Immutable) {
- unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
- Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
- Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/ true,
- /*Alloca*/ nullptr,
- /*isAliased*/ false));
- return -++NumFixedObjects;
-}
-
-BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- BitVector BV(TRI->getNumRegs());
-
- // Before CSI is calculated, no registers are considered pristine. They can be
- // freely used and PEI will make sure they are saved.
- if (!isCalleeSavedInfoValid())
- return BV;
-
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
- BV.set(*CSR);
-
- // Saved CSRs are not pristine.
- for (auto &I : getCalleeSavedInfo())
- for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
- BV.reset(*S);
-
- return BV;
-}
-
-unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- unsigned MaxAlign = getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
- if (isDeadObjectIndex(i))
- continue;
- Offset += getObjectSize(i);
- unsigned Align = getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (adjustsStack() || hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
-void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
- if (Objects.empty()) return;
-
- const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
- int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
-
- OS << "Frame Objects:\n";
-
- for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
- const StackObject &SO = Objects[i];
- OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
- if (SO.Size == ~0ULL) {
- OS << "dead\n";
- continue;
- }
- if (SO.Size == 0)
- OS << "variable sized";
- else
- OS << "size=" << SO.Size;
- OS << ", align=" << SO.Alignment;
-
- if (i < NumFixedObjects)
- OS << ", fixed";
- if (i < NumFixedObjects || SO.SPOffset != -1) {
- int64_t Off = SO.SPOffset - ValOffset;
- OS << ", at location [SP";
- if (Off > 0)
- OS << "+" << Off;
- else if (Off < 0)
- OS << Off;
- OS << "]";
- }
- OS << "\n";
- }
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineFrameInfo::dump(const MachineFunction &MF) const {
- print(MF, dbgs());
-}
-#endif
-
-//===----------------------------------------------------------------------===//
// MachineJumpTableInfo implementation
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 2265676..5ffe330 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -42,7 +42,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
return false;
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
- MachineFunction &MF = MMI.getMachineFunction(F);
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
MachineFunctionProperties &MFProps = MF.getProperties();
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0d533c3..55d9def 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 2f2e3b3..535757e 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1,4 +1,4 @@
-//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,20 +12,34 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -34,10 +48,14 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -45,6 +63,14 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
static cl::opt<bool> PrintWholeRegMask(
@@ -256,14 +282,27 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
case MachineOperand::MO_GlobalAddress:
return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
case MachineOperand::MO_ExternalSymbol:
- return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ return strcmp(getSymbolName(), Other.getSymbolName()) == 0 &&
getOffset() == Other.getOffset();
case MachineOperand::MO_BlockAddress:
return getBlockAddress() == Other.getBlockAddress() &&
getOffset() == Other.getOffset();
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ // Calculate the size of the RegMask
+ const MachineFunction *MF = getParent()->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_CFIIndex:
@@ -403,6 +442,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool Unused;
APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
OS << "half " << APF.convertToFloat();
+ } else if (getFPImm()->getType()->isFP128Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ SmallString<16> Str;
+ getFPImm()->getValueAPF().toString(Str);
+ OS << "quad " << Str;
+ } else if (getFPImm()->getType()->isX86_FP80Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ OS << "x86_fp80 0xK";
+ APInt API = APF.bitcastToAPInt();
+ OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+ /*Upper=*/true);
+ OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+ /*Upper=*/true);
} else {
OS << getFPImm()->getValueAPF().convertToDouble();
}
@@ -491,6 +543,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
<< CmpInst::getPredicateName(Pred) << '>';
+ break;
}
}
if (unsigned TF = getTargetFlags())
@@ -514,6 +567,21 @@ unsigned MachinePointerInfo::getAddrSpace() const {
return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
+/// isDereferenceable - Return true if V is always dereferenceable for
+/// Offset + Size byte.
+bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
+ const DataLayout &DL) const {
+ if (!V.is<const Value*>())
+ return false;
+
+ const Value *BasePtr = V.get<const Value*>();
+ if (BasePtr == nullptr)
+ return false;
+
+ return isDereferenceableAndAlignedPointer(
+ BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+}
+
/// getConstantPool - Return a MachinePointerInfo record that refers to the
/// constant pool.
MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
@@ -544,7 +612,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
uint64_t s, unsigned int a,
const AAMDNodes &AAInfo,
const MDNode *Ranges,
- SynchronizationScope SynchScope,
+ SyncScope::ID SSID,
AtomicOrdering Ordering,
AtomicOrdering FailureOrdering)
: PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
@@ -555,8 +623,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
- AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope);
- assert(getSynchScope() == SynchScope && "Value truncated");
+ AtomicInfo.SSID = static_cast<unsigned>(SSID);
+ assert(getSyncScopeID() == SSID && "Value truncated");
AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
assert(getOrdering() == Ordering && "Value truncated");
AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
@@ -682,6 +750,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
OS << "(dereferenceable)";
if (isInvariant())
OS << "(invariant)";
+ if (getFlags() & MOTargetFlag1)
+ OS << "(flag1)";
+ if (getFlags() & MOTargetFlag2)
+ OS << "(flag2)";
+ if (getFlags() & MOTargetFlag3)
+ OS << "(flag3)";
}
//===----------------------------------------------------------------------===//
@@ -704,9 +778,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
- : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
- AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
- debugLoc(std::move(dl)) {
+ : MCID(&tid), debugLoc(std::move(dl)) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -723,9 +795,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
- Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
- MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
+ debugLoc(MI.getDebugLoc()) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -1571,6 +1642,65 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
+bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
+ bool UseTBAA) {
+ const MachineFunction *MF = getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
+ return false;
+
+ if (!AA)
+ return true;
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!hasOneMemOperand() || !Other.hasOneMemOperand())
+ return true;
+
+ MachineMemOperand *MMOa = *memoperands_begin();
+ MachineMemOperand *MMOb = *Other.memoperands_begin();
+
+ if (!MMOa->getValue() || !MMOb->getValue())
+ return true;
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MMOb->getValue(), Overlapb,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+
+ return (AAResult != NoAlias);
+}
+
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
/// reference is not available. Return false if it is known to have no ordered
@@ -1589,7 +1719,7 @@ bool MachineInstr::hasOrderedMemoryRef() const {
return true;
// Check if any of our memory operands are ordered.
- return any_of(memoperands(), [](const MachineMemOperand *MMO) {
+ return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
return !MMO->isUnordered();
});
}
@@ -1692,14 +1822,14 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
}
}
-LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
dbgs() << " ";
- print(dbgs(), false /* SkipOpers */, TII);
-#endif
+ print(dbgs());
}
+#endif
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
const TargetInstrInfo *TII) const {
const Module *M = nullptr;
if (const MachineBasicBlock *MBB = getParent())
@@ -1707,11 +1837,12 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
M = MF->getFunction()->getParent();
ModuleSlotTracker MST(M);
- print(OS, MST, SkipOpers, TII);
+ print(OS, MST, SkipOpers, SkipDebugLoc, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
- bool SkipOpers, const TargetInstrInfo *TII) const {
+ bool SkipOpers, bool SkipDebugLoc,
+ const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@@ -1762,7 +1893,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
return;
// Print the rest of the operands.
- bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
unsigned AsmDescOp = ~0u;
unsigned AsmOpCount = 0;
@@ -1799,31 +1929,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegs.push_back(MO.getReg());
- // Omit call-clobbered registers which aren't used anywhere. This makes
- // call instructions much less noisy on targets where calls clobber lots
- // of registers. Don't rely on MO.isDead() because we may be called before
- // LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MRI && isCall() &&
- MO.isReg() && MO.isImplicit() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (MRI->use_empty(Reg)) {
- bool HasAliasLive = false;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- unsigned AliasReg = *AI;
- if (!MRI->use_empty(AliasReg)) {
- HasAliasLive = true;
- break;
- }
- }
- if (!HasAliasLive) {
- OmittedAnyCallClobbers = true;
- continue;
- }
- }
- }
- }
-
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
if (i < getDesc().NumOperands) {
@@ -1905,12 +2010,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
MO.print(OS, MST, TRI);
}
- // Briefly indicate whether any call clobbers were omitted.
- if (OmittedAnyCallClobbers) {
- if (!FirstOp) OS << ",";
- OS << " ...";
- }
-
bool HaveSemi = false;
const unsigned PrintableFlags = FrameSetup | FrameDestroy;
if (Flags & PrintableFlags) {
@@ -1987,6 +2086,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (isIndirectDebugValue())
OS << " indirect";
+ } else if (SkipDebugLoc) {
+ return;
} else if (debugLoc && MF) {
if (!HaveSemi)
OS << ";";
@@ -2174,8 +2275,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// If there are no uses, including partial uses, the def is dead.
- if (none_of(UsedRegs,
- [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ if (llvm::none_of(UsedRegs,
+ [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
MO.setIsDead();
}
@@ -2263,3 +2364,26 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
BB.insert(I, MI);
return MachineInstrBuilder(MF, MI);
}
+
+MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const MachineInstr &Orig,
+ int FrameIndex) {
+ const MDNode *Var = Orig.getDebugVariable();
+ const auto *Expr = cast_or_null<DIExpression>(Orig.getDebugExpression());
+ bool IsIndirect = Orig.isIndirectDebugValue();
+ uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0;
+ DebugLoc DL = Orig.getDebugLoc();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ // If the DBG_VALUE already was a memory location, add an extra
+ // DW_OP_deref. Otherwise just turning this from a register into a
+ // memory/indirect location is sufficient.
+ if (IsIndirect)
+ Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ return BuildMI(BB, I, DL, Orig.getDesc())
+ .addFrameIndex(FrameIndex)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index b3d1843..c7113f1 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/CommandLine.h"
@@ -38,7 +38,7 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "machine-licm"
+#define DEBUG_TYPE "machinelicm"
static cl::opt<bool>
AvoidSpeculation("avoid-speculation",
@@ -237,13 +237,13 @@ namespace {
char MachineLICM::ID = 0;
char &llvm::MachineLICMID = MachineLICM::ID;
-INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
+ "Machine Loop Invariant Code Motion", false, false)
/// Test if the given loop is the outer-most loop that has a unique predecessor.
static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
@@ -330,7 +330,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
/// Return true if instruction stores to the specified frame.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
// If we lost memory operands, conservatively assume that the instruction
- // writes to all slots.
+ // writes to all slots.
if (MI->memoperands_empty())
return true;
for (const MachineMemOperand *MemOp : MI->memoperands()) {
@@ -708,7 +708,7 @@ void MachineLICM::SinkIntoLoop() {
for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
- // As such, it must not have side-effects, e.g. such as a call has.
+ // As such, it must not have side-effects, e.g. such as a call has.
if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
Candidates.push_back(&*I);
}
@@ -837,9 +837,9 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
/// constant pool.
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert (MI.mayLoad() && "Expected MI that loads!");
-
+
// If we lost memory operands, conservatively assume that the instruction
- // reads from everything..
+ // reads from everything..
if (MI.memoperands_empty())
return true;
@@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->isConstantPhysReg(Reg))
- return false;
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent())))
+ return false;
// Otherwise it's safe to move.
continue;
} else if (!MO.isDead()) {
@@ -1337,7 +1340,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
// Since we are moving the instruction out of its basic block, we do not
- // retain its debug location. Doing so would degrade the debugging
+ // retain its debug location. Doing so would degrade the debugging
// experience and adversely affect the accuracy of profiling information.
MI->setDebugLoc(DebugLoc());
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index fdeaf7b..a9aa1d9 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -87,6 +87,22 @@ MachineBasicBlock *MachineLoop::findLoopControlBlock() {
return nullptr;
}
+DebugLoc MachineLoop::getStartLoc() const {
+ // Try the pre-header first.
+ if (MachineBasicBlock *PHeadMBB = getLoopPreheader())
+ if (const BasicBlock *PHeadBB = PHeadMBB->getBasicBlock())
+ if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
+ return DL;
+
+ // If we have no pre-header or there are no instructions with debug
+ // info in it, try the header.
+ if (MachineBasicBlock *HeadMBB = getHeader())
+ if (const BasicBlock *HeadBB = HeadMBB->getBasicBlock())
+ return HeadBB->getTerminator()->getDebugLoc();
+
+ return DebugLoc();
+}
+
MachineBasicBlock *
MachineLoopInfo::findLoopPreheader(MachineLoop *L,
bool SpeculativePreheader) const {
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 6618857..825290a 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -8,43 +8,51 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <utility>
+#include <vector>
+
using namespace llvm;
using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false)
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
-MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
namespace llvm {
+
class MMIAddrLabelMapCallbackPtr final : CallbackVH {
- MMIAddrLabelMap *Map;
+ MMIAddrLabelMap *Map = nullptr;
+
public:
- MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
- MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {}
+ MMIAddrLabelMapCallbackPtr() = default;
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
@@ -75,11 +83,12 @@ class MMIAddrLabelMap {
/// This is a per-function list of symbols whose corresponding BasicBlock got
/// deleted. These symbols need to be emitted at some point in the file, so
/// AsmPrinter emits them after the function body.
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
DeletedAddrLabelsNeedingEmission;
-public:
+public:
MMIAddrLabelMap(MCContext &context) : Context(context) {}
+
~MMIAddrLabelMap() {
assert(DeletedAddrLabelsNeedingEmission.empty() &&
"Some labels for deleted blocks never got emitted");
@@ -93,7 +102,8 @@ public:
void UpdateForDeletedBlock(BasicBlock *BB);
void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
};
-}
+
+} // end namespace llvm
ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
@@ -119,7 +129,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
/// If we have any deleted symbols for F, return them.
void MMIAddrLabelMap::
takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
DeletedAddrLabelsNeedingEmission.find(F);
// If there are no entries for the function, just return.
@@ -130,7 +140,6 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
DeletedAddrLabelsNeedingEmission.erase(I);
}
-
void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// If the block got deleted, there is no need for the symbol. If the symbol
// was already emitted, we can just forget about it, otherwise we need to
@@ -177,7 +186,6 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
OldEntry.Symbols.end());
}
-
void MMIAddrLabelMapCallbackPtr::deleted() {
Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
}
@@ -186,9 +194,6 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-
-//===----------------------------------------------------------------------===//
-
MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
: ImmutablePass(ID), TM(*TM),
Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
@@ -196,11 +201,9 @@ MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
-MachineModuleInfo::~MachineModuleInfo() {
-}
+MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
-
ObjFileMMI = nullptr;
CurCallSite = 0;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
@@ -211,7 +214,6 @@ bool MachineModuleInfo::doInitialization(Module &M) {
}
bool MachineModuleInfo::doFinalization(Module &M) {
-
Personalities.clear();
delete AddrLabelSymbols;
@@ -256,7 +258,14 @@ void MachineModuleInfo::addPersonality(const Function *Personality) {
/// \}
-MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
+MachineFunction *
+MachineModuleInfo::getMachineFunction(const Function &F) const {
+ auto I = MachineFunctions.find(&F);
+ return I != MachineFunctions.end() ? I->second.get() : nullptr;
+}
+
+MachineFunction &
+MachineModuleInfo::getOrCreateMachineFunction(const Function &F) {
// Shortcut for the common case where a sequence of MachineFunctionPasses
// all query for the same Function.
if (LastRequest == &F)
@@ -270,10 +279,6 @@ MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
MF = new MachineFunction(&F, TM, NextFnNum++, *this);
// Update the set entry.
I.first->second.reset(MF);
-
- if (MFInitializer)
- if (MFInitializer->initializeMachineFunction(*MF))
- report_fatal_error("Unable to initialize machine function");
} else {
MF = I.first->second.get();
}
@@ -290,10 +295,12 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
}
namespace {
+
/// This pass frees the MachineFunction object associated with a Function.
class FreeMachineFunction : public FunctionPass {
public:
static char ID;
+
FreeMachineFunction() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -306,15 +313,19 @@ public:
MMI.deleteMachineFunctionFor(F);
return true;
}
+
+ StringRef getPassName() const override {
+ return "Free MachineFunction";
+ }
};
-char FreeMachineFunction::ID;
+
} // end anonymous namespace
-namespace llvm {
-FunctionPass *createFreeMachineFunctionPass() {
+char FreeMachineFunction::ID;
+
+FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
-} // end namespace llvm
//===- MMI building helpers -----------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
new file mode 100644
index 0000000..73c3428
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -0,0 +1,108 @@
+///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// Optimization diagnostic interfaces for machine passes. It's packaged as an
+/// analysis pass so that by using this service passes become dependent on MBFI
+/// as well. MBFI is used to compute the "hotness" of the diagnostic message.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
+ StringRef MKey, const MachineInstr &MI)
+ : Argument() {
+ Key = MKey;
+
+ raw_string_ostream OS(Val);
+ MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true);
+}
+
+Optional<uint64_t>
+MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
+ if (!MBFI)
+ return None;
+
+ return MBFI->getBlockProfileCount(&MBB);
+}
+
+void MachineOptimizationRemarkEmitter::computeHotness(
+ DiagnosticInfoMIROptimization &Remark) {
+ const MachineBasicBlock *MBB = Remark.getBlock();
+ if (MBB)
+ Remark.setHotness(computeHotness(*MBB));
+}
+
+void MachineOptimizationRemarkEmitter::emit(
+ DiagnosticInfoOptimizationBase &OptDiagCommon) {
+ auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
+ computeHotness(OptDiag);
+
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+
+ // If a diagnostic has a hotness value, then only emit it if its hotness
+ // meets the threshold.
+ if (OptDiag.getHotness() &&
+ *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) {
+ return;
+ }
+
+ yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
+ if (Out) {
+ auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
+ *Out << P;
+ }
+ // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+ // from here to clang.
+ if (!OptDiag.isVerbose() || shouldEmitVerbose())
+ Ctx.diagnose(OptDiag);
+}
+
+MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineOptimizationRemarkEmitterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ MachineBlockFrequencyInfo *MBFI;
+
+ if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested())
+ MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ else
+ MBFI = nullptr;
+
+ ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ return false;
+}
+
+void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char MachineOptimizationRemarkEmitterPass::ID = 0;
+static const char ore_name[] = "Machine Optimization Remark Emitter";
+#define ORE_NAME "machine-opt-remark-emitter"
+
+INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass)
+INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
new file mode 100644
index 0000000..fd6b242
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -0,0 +1,1251 @@
+//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Replaces repeated sequences of instructions with function calls.
+///
+/// This works by placing every instruction from every basic block in a
+/// suffix tree, and repeatedly querying that tree for repeated sequences of
+/// instructions. If a sequence of instructions appears often, then it ought
+/// to be beneficial to pull out into a function.
+///
+/// This was originally presented at the 2016 LLVM Developers' Meeting in the
+/// talk "Reducing Code Size Using Outlining". For a high-level overview of
+/// how this pass works, the talk is available on YouTube at
+///
+/// https://www.youtube.com/watch?v=yorld-WSOeU
+///
+/// The slides for the talk are available at
+///
+/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
+///
+/// The talk provides an overview of how the outliner finds candidates and
+/// ultimately outlines them. It describes how the main data structure for this
+/// pass, the suffix tree, is queried and purged for candidates. It also gives
+/// a simplified suffix tree construction algorithm for suffix trees based off
+/// of the algorithm actually used here, Ukkonen's algorithm.
+///
+/// For the original RFC for this pass, please see
+///
+/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
+///
+/// For more information on the suffix tree data structure, please see
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <functional>
+#include <map>
+#include <sstream>
+#include <tuple>
+#include <vector>
+
+#define DEBUG_TYPE "machine-outliner"
+
+using namespace llvm;
+
+STATISTIC(NumOutlined, "Number of candidates outlined");
+STATISTIC(FunctionsCreated, "Number of functions created");
+
+namespace {
+
+/// \brief An individual sequence of instructions to be replaced with a call to
+/// an outlined function.
+struct Candidate {
+
+ /// Set to false if the candidate overlapped with another candidate.
+ bool InCandidateList = true;
+
+ /// The start index of this \p Candidate.
+ size_t StartIdx;
+
+ /// The number of instructions in this \p Candidate.
+ size_t Len;
+
+ /// The index of this \p Candidate's \p OutlinedFunction in the list of
+ /// \p OutlinedFunctions.
+ size_t FunctionIdx;
+
+ /// \brief The number of instructions that would be saved by outlining every
+ /// candidate of this type.
+ ///
+ /// This is a fixed value which is not updated during the candidate pruning
+ /// process. It is only used for deciding which candidate to keep if two
+ /// candidates overlap. The true benefit is stored in the OutlinedFunction
+ /// for some given candidate.
+ unsigned Benefit = 0;
+
+ Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx)
+ : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
+
+ Candidate() {}
+
+ /// \brief Used to ensure that \p Candidates are outlined in an order that
+ /// preserves the start and end indices of other \p Candidates.
+ bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
+};
+
+/// \brief The information necessary to create an outlined function for some
+/// class of candidate.
+struct OutlinedFunction {
+
+ /// The actual outlined function created.
+ /// This is initialized after we go through and create the actual function.
+ MachineFunction *MF = nullptr;
+
+ /// A number assigned to this function which appears at the end of its name.
+ size_t Name;
+
+ /// The number of candidates for this OutlinedFunction.
+ size_t OccurrenceCount = 0;
+
+ /// \brief The sequence of integers corresponding to the instructions in this
+ /// function.
+ std::vector<unsigned> Sequence;
+
+ /// The number of instructions this function would save.
+ unsigned Benefit = 0;
+
+ /// \brief Set to true if candidates for this outlined function should be
+ /// replaced with tail calls to this OutlinedFunction.
+ bool IsTailCall = false;
+
+ OutlinedFunction(size_t Name, size_t OccurrenceCount,
+ const std::vector<unsigned> &Sequence,
+ unsigned Benefit, bool IsTailCall)
+ : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
+ Benefit(Benefit), IsTailCall(IsTailCall)
+ {}
+};
+
+/// Represents an undefined index in the suffix tree.
+const size_t EmptyIdx = -1;
+
+/// A node in a suffix tree which represents a substring or suffix.
+///
+/// Each node has either no children or at least two children, with the root
+/// being a exception in the empty tree.
+///
+/// Children are represented as a map between unsigned integers and nodes. If
+/// a node N has a child M on unsigned integer k, then the mapping represented
+/// by N is a proper prefix of the mapping represented by M. Note that this,
+/// although similar to a trie is somewhat different: each node stores a full
+/// substring of the full mapping rather than a single character state.
+///
+/// Each internal node contains a pointer to the internal node representing
+/// the same string, but with the first character chopped off. This is stored
+/// in \p Link. Each leaf node stores the start index of its respective
+/// suffix in \p SuffixIdx.
+struct SuffixTreeNode {
+
+ /// The children of this node.
+ ///
+ /// A child existing on an unsigned integer implies that from the mapping
+ /// represented by the current node, there is a way to reach another
+ /// mapping by tacking that character on the end of the current string.
+ DenseMap<unsigned, SuffixTreeNode *> Children;
+
+ /// A flag set to false if the node has been pruned from the tree.
+ bool IsInTree = true;
+
+ /// The start index of this node's substring in the main string.
+ size_t StartIdx = EmptyIdx;
+
+ /// The end index of this node's substring in the main string.
+ ///
+ /// Every leaf node must have its \p EndIdx incremented at the end of every
+ /// step in the construction algorithm. To avoid having to update O(N)
+ /// nodes individually at the end of every step, the end index is stored
+ /// as a pointer.
+ size_t *EndIdx = nullptr;
+
+ /// For leaves, the start index of the suffix represented by this node.
+ ///
+ /// For all other nodes, this is ignored.
+ size_t SuffixIdx = EmptyIdx;
+
+ /// \brief For internal nodes, a pointer to the internal node representing
+ /// the same sequence with the first character chopped off.
+ ///
+ /// This has two major purposes in the suffix tree. The first is as a
+ /// shortcut in Ukkonen's construction algorithm. One of the things that
+ /// Ukkonen's algorithm does to achieve linear-time construction is
+ /// keep track of which node the next insert should be at. This makes each
+ /// insert O(1), and there are a total of O(N) inserts. The suffix link
+ /// helps with inserting children of internal nodes.
+ ///
+ /// Say we add a child to an internal node with associated mapping S. The
+ /// next insertion must be at the node representing S - its first character.
+ /// This is given by the way that we iteratively build the tree in Ukkonen's
+ /// algorithm. The main idea is to look at the suffixes of each prefix in the
+ /// string, starting with the longest suffix of the prefix, and ending with
+ /// the shortest. Therefore, if we keep pointers between such nodes, we can
+ /// move to the next insertion point in O(1) time. If we don't, then we'd
+ /// have to query from the root, which takes O(N) time. This would make the
+ /// construction algorithm O(N^2) rather than O(N).
+ ///
+ /// The suffix link is also used during the tree pruning process to let us
+ /// quickly throw out a bunch of potential overlaps. Say we have a sequence
+ /// S we want to outline. Then each of its suffixes contribute to at least
+ /// one overlapping case. Therefore, we can follow the suffix links
+ /// starting at the node associated with S to the root and "delete" those
+ /// nodes, save for the root. For each candidate, this removes
+ /// O(|candidate|) overlaps from the search space. We don't actually
+ /// completely invalidate these nodes though; doing that is far too
+ /// aggressive. Consider the following pathological string:
+ ///
+ /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3
+ ///
+ /// If we, for the sake of example, outlined 1 2 3, then we would throw
+ /// out all instances of 2 3. This isn't desirable. To get around this,
+ /// when we visit a link node, we decrement its occurrence count by the
+ /// number of sequences we outlined in the current step. In the pathological
+ /// example, the 2 3 node would have an occurrence count of 8, while the
+ /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node
+ /// would survive to the next round allowing us to outline the extra
+ /// instances of 2 3.
+ SuffixTreeNode *Link = nullptr;
+
+ /// The parent of this node. Every node except for the root has a parent.
+ SuffixTreeNode *Parent = nullptr;
+
+ /// The number of times this node's string appears in the tree.
+ ///
+ /// This is equal to the number of leaf children of the string. It represents
+ /// the number of suffixes that the node's string is a prefix of.
+ size_t OccurrenceCount = 0;
+
+ /// The length of the string formed by concatenating the edge labels from the
+ /// root to this node.
+ size_t ConcatLen = 0;
+
+ /// Returns true if this node is a leaf.
+ bool isLeaf() const { return SuffixIdx != EmptyIdx; }
+
+ /// Returns true if this node is the root of its owning \p SuffixTree.
+ bool isRoot() const { return StartIdx == EmptyIdx; }
+
+ /// Return the number of elements in the substring associated with this node.
+ size_t size() const {
+
+ // Is it the root? If so, it's the empty string so return 0.
+ if (isRoot())
+ return 0;
+
+ assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
+
+ // Size = the number of elements in the string.
+ // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
+ return *EndIdx - StartIdx + 1;
+ }
+
+ SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
+ SuffixTreeNode *Parent)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+
+ SuffixTreeNode() {}
+};
+
+/// A data structure for fast substring queries.
+///
+/// Suffix trees represent the suffixes of their input strings in their leaves.
+/// A suffix tree is a type of compressed trie structure where each node
+/// represents an entire substring rather than a single character. Each leaf
+/// of the tree is a suffix.
+///
+/// A suffix tree can be seen as a type of state machine where each state is a
+/// substring of the full string. The tree is structured so that, for a string
+/// of length N, there are exactly N leaves in the tree. This structure allows
+/// us to quickly find repeated substrings of the input string.
+///
+/// In this implementation, a "string" is a vector of unsigned integers.
+/// These integers may result from hashing some data type. A suffix tree can
+/// contain 1 or many strings, which can then be queried as one large string.
+///
+/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
+/// suffix tree construction. Ukkonen's algorithm is explained in more detail
+/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
+/// paper is available at
+///
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+class SuffixTree {
+private:
+ /// Each element is an integer representing an instruction in the module.
+ ArrayRef<unsigned> Str;
+
+ /// Maintains each node in the tree.
+ SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
+
+ /// The root of the suffix tree.
+ ///
+ /// The root represents the empty string. It is maintained by the
+ /// \p NodeAllocator like every other node in the tree.
+ SuffixTreeNode *Root = nullptr;
+
+ /// Stores each leaf node in the tree.
+ ///
+ /// This is used for finding outlining candidates.
+ std::vector<SuffixTreeNode *> LeafVector;
+
+ /// Maintains the end indices of the internal nodes in the tree.
+ ///
+ /// Each internal node is guaranteed to never have its end index change
+ /// during the construction algorithm; however, leaves must be updated at
+ /// every step. Therefore, we need to store leaf end indices by reference
+ /// to avoid updating O(N) leaves at every step of construction. Thus,
+ /// every internal node must be allocated its own end index.
+ BumpPtrAllocator InternalEndIdxAllocator;
+
+ /// The end index of each leaf in the tree.
+ size_t LeafEndIdx = -1;
+
+ /// \brief Helper struct which keeps track of the next insertion point in
+ /// Ukkonen's algorithm.
+ struct ActiveState {
+ /// The next node to insert at.
+ SuffixTreeNode *Node;
+
+ /// The index of the first character in the substring currently being added.
+ size_t Idx = EmptyIdx;
+
+ /// The length of the substring we have to add at the current step.
+ size_t Len = 0;
+ };
+
+ /// \brief The point the next insertion will take place at in the
+ /// construction algorithm.
+ ActiveState Active;
+
+ /// Allocate a leaf node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated leaf node.
+ SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
+ unsigned Edge) {
+
+ assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
+
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ &LeafEndIdx,
+ nullptr,
+ &Parent);
+ Parent.Children[Edge] = N;
+
+ return N;
+ }
+
+ /// Allocate an internal node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node. Only null when allocating the root.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param EndIdx The end index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated internal node.
+ SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
+ size_t EndIdx, unsigned Edge) {
+
+ assert(StartIdx <= EndIdx && "String can't start after it ends!");
+ assert(!(!Parent && StartIdx != EmptyIdx) &&
+ "Non-root internal nodes must have parents!");
+
+ size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ E,
+ Root,
+ Parent);
+ if (Parent)
+ Parent->Children[Edge] = N;
+
+ return N;
+ }
+
+ /// \brief Set the suffix indices of the leaves to the start indices of their
+ /// respective suffixes. Also stores each leaf in \p LeafVector at its
+ /// respective suffix index.
+ ///
+ /// \param[in] CurrNode The node currently being visited.
+ /// \param CurrIdx The current index of the string being visited.
+ void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
+
+ bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
+
+ // Store the length of the concatenation of all strings from the root to
+ // this node.
+ if (!CurrNode.isRoot()) {
+ if (CurrNode.ConcatLen == 0)
+ CurrNode.ConcatLen = CurrNode.size();
+
+ if (CurrNode.Parent)
+ CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
+ }
+
+ // Traverse the tree depth-first.
+ for (auto &ChildPair : CurrNode.Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ setSuffixIndices(*ChildPair.second,
+ CurrIdx + ChildPair.second->size());
+ }
+
+ // Is this node a leaf?
+ if (IsLeaf) {
+ // If yes, give it a suffix index and bump its parent's occurrence count.
+ CurrNode.SuffixIdx = Str.size() - CurrIdx;
+ assert(CurrNode.Parent && "CurrNode had no parent!");
+ CurrNode.Parent->OccurrenceCount++;
+
+ // Store the leaf in the leaf vector for pruning later.
+ LeafVector[CurrNode.SuffixIdx] = &CurrNode;
+ }
+ }
+
+ /// \brief Construct the suffix tree for the prefix of the input ending at
+ /// \p EndIdx.
+ ///
+ /// Used to construct the full suffix tree iteratively. At the end of each
+ /// step, the constructed suffix tree is either a valid suffix tree, or a
+ /// suffix tree with implicit suffixes. At the end of the final step, the
+ /// suffix tree is a valid tree.
+ ///
+ /// \param EndIdx The end index of the current prefix in the main string.
+ /// \param SuffixesToAdd The number of suffixes that must be added
+ /// to complete the suffix tree at the current phase.
+ ///
+ /// \returns The number of suffixes that have not been added at the end of
+ /// this step.
+ unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
+ SuffixTreeNode *NeedsLink = nullptr;
+
+ while (SuffixesToAdd > 0) {
+
+ // Are we waiting to add anything other than just the last character?
+ if (Active.Len == 0) {
+ // If not, then say the active index is the end index.
+ Active.Idx = EndIdx;
+ }
+
+ assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
+
+ // The first character in the current substring we're looking at.
+ unsigned FirstChar = Str[Active.Idx];
+
+ // Have we inserted anything starting with FirstChar at the current node?
+ if (Active.Node->Children.count(FirstChar) == 0) {
+ // If not, then we can just insert a leaf and move too the next step.
+ insertLeaf(*Active.Node, EndIdx, FirstChar);
+
+ // The active node is an internal node, and we visited it, so it must
+ // need a link if it doesn't have one.
+ if (NeedsLink) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+ } else {
+ // There's a match with FirstChar, so look for the point in the tree to
+ // insert a new node.
+ SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
+
+ size_t SubstringLen = NextNode->size();
+
+ // Is the current suffix we're trying to insert longer than the size of
+ // the child we want to move to?
+ if (Active.Len >= SubstringLen) {
+ // If yes, then consume the characters we've seen and move to the next
+ // node.
+ Active.Idx += SubstringLen;
+ Active.Len -= SubstringLen;
+ Active.Node = NextNode;
+ continue;
+ }
+
+ // Otherwise, the suffix we're trying to insert must be contained in the
+ // next node we want to move to.
+ unsigned LastChar = Str[EndIdx];
+
+ // Is the string we're trying to insert a substring of the next node?
+ if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+ // If yes, then we're done for this step. Remember our insertion point
+ // and move to the next end index. At this point, we have an implicit
+ // suffix tree.
+ if (NeedsLink && !Active.Node->isRoot()) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+
+ Active.Len++;
+ break;
+ }
+
+ // The string we're trying to insert isn't a substring of the next node,
+ // but matches up to a point. Split the node.
+ //
+ // For example, say we ended our search at a node n and we're trying to
+ // insert ABD. Then we'll create a new node s for AB, reduce n to just
+ // representing C, and insert a new leaf node l to represent d. This
+ // allows us to ensure that if n was a leaf, it remains a leaf.
+ //
+ // | ABC ---split---> | AB
+ // n s
+ // C / \ D
+ // n l
+
+ // The node s from the diagram
+ SuffixTreeNode *SplitNode =
+ insertInternalNode(Active.Node,
+ NextNode->StartIdx,
+ NextNode->StartIdx + Active.Len - 1,
+ FirstChar);
+
+ // Insert the new node representing the new substring into the tree as
+ // a child of the split node. This is the node l from the diagram.
+ insertLeaf(*SplitNode, EndIdx, LastChar);
+
+ // Make the old node a child of the split node and update its start
+ // index. This is the node n from the diagram.
+ NextNode->StartIdx += Active.Len;
+ NextNode->Parent = SplitNode;
+ SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+
+ // SplitNode is an internal node, update the suffix link.
+ if (NeedsLink)
+ NeedsLink->Link = SplitNode;
+
+ NeedsLink = SplitNode;
+ }
+
+ // We've added something new to the tree, so there's one less suffix to
+ // add.
+ SuffixesToAdd--;
+
+ if (Active.Node->isRoot()) {
+ if (Active.Len > 0) {
+ Active.Len--;
+ Active.Idx = EndIdx - SuffixesToAdd + 1;
+ }
+ } else {
+ // Start the next phase at the next smallest suffix.
+ Active.Node = Active.Node->Link;
+ }
+ }
+
+ return SuffixesToAdd;
+ }
+
+public:
+
+ /// Find all repeated substrings that satisfy \p BenefitFn.
+ ///
+ /// If a substring appears at least twice, then it must be represented by
+ /// an internal node which appears in at least two suffixes. Each suffix is
+ /// represented by a leaf node. To do this, we visit each internal node in
+ /// the tree, using the leaf children of each internal node. If an internal
+ /// node represents a beneficial substring, then we use each of its leaf
+ /// children to find the locations of its substring.
+ ///
+ /// \param[out] CandidateList Filled with candidates representing each
+ /// beneficial substring.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
+ /// type of candidate.
+ /// \param BenefitFn The function to satisfy.
+ ///
+ /// \returns The length of the longest candidate found.
+ size_t findCandidates(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)>
+ &BenefitFn) {
+
+ CandidateList.clear();
+ FunctionList.clear();
+ size_t FnIdx = 0;
+ size_t MaxLen = 0;
+
+ for (SuffixTreeNode* Leaf : LeafVector) {
+ assert(Leaf && "Leaves in LeafVector cannot be null!");
+ if (!Leaf->IsInTree)
+ continue;
+
+ assert(Leaf->Parent && "All leaves must have parents!");
+ SuffixTreeNode &Parent = *(Leaf->Parent);
+
+ // If it doesn't appear enough, or we already outlined from it, skip it.
+ if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
+ continue;
+
+ size_t StringLen = Leaf->ConcatLen - Leaf->size();
+
+ // How many instructions would outlining this string save?
+ unsigned Benefit = BenefitFn(Parent,
+ StringLen, Str[Leaf->SuffixIdx + StringLen - 1]);
+
+ // If it's not beneficial, skip it.
+ if (Benefit < 1)
+ continue;
+
+ if (StringLen > MaxLen)
+ MaxLen = StringLen;
+
+ unsigned OccurrenceCount = 0;
+ for (auto &ChildPair : Parent.Children) {
+ SuffixTreeNode *M = ChildPair.second;
+
+ // Is it a leaf? If so, we have an occurrence of this candidate.
+ if (M && M->IsInTree && M->isLeaf()) {
+ OccurrenceCount++;
+ CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx);
+ CandidateList.back().Benefit = Benefit;
+ M->IsInTree = false;
+ }
+ }
+
+ // Save the function for the new candidate sequence.
+ std::vector<unsigned> CandidateSequence;
+ for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
+ CandidateSequence.push_back(Str[i]);
+
+ FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence,
+ Benefit, false);
+
+ // Move to the next function.
+ FnIdx++;
+ Parent.IsInTree = false;
+ }
+
+ return MaxLen;
+ }
+
+ /// Construct a suffix tree from a sequence of unsigned integers.
+ ///
+ /// \param Str The string to construct the suffix tree for.
+ SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
+ Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+ Root->IsInTree = true;
+ Active.Node = Root;
+ LeafVector = std::vector<SuffixTreeNode*>(Str.size());
+
+ // Keep track of the number of suffixes we have to add of the current
+ // prefix.
+ size_t SuffixesToAdd = 0;
+ Active.Node = Root;
+
+ // Construct the suffix tree iteratively on each prefix of the string.
+ // PfxEndIdx is the end index of the current prefix.
+ // End is one past the last element in the string.
+ for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+ SuffixesToAdd++;
+ LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
+ SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
+ }
+
+ // Set the suffix indices of each leaf.
+ assert(Root && "Root node can't be nullptr!");
+ setSuffixIndices(*Root, 0);
+ }
+};
+
+/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings.
+struct InstructionMapper {
+
+ /// \brief The next available integer to assign to a \p MachineInstr that
+ /// cannot be outlined.
+ ///
+ /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
+ unsigned IllegalInstrNumber = -3;
+
+ /// \brief The next available integer to assign to a \p MachineInstr that can
+ /// be outlined.
+ unsigned LegalInstrNumber = 0;
+
+ /// Correspondence from \p MachineInstrs to unsigned integers.
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
+ InstructionIntegerMap;
+
+ /// Corresponcence from unsigned integers to \p MachineInstrs.
+ /// Inverse of \p InstructionIntegerMap.
+ DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+
+ /// The vector of unsigned integers that the module is mapped to.
+ std::vector<unsigned> UnsignedVec;
+
+ /// \brief Stores the location of the instruction associated with the integer
+ /// at index i in \p UnsignedVec for each index i.
+ std::vector<MachineBasicBlock::iterator> InstrList;
+
+ /// \brief Maps \p *It to a legal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
+ /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+
+ // Get the integer for this instruction or give it the current
+ // LegalInstrNumber.
+ InstrList.push_back(It);
+ MachineInstr &MI = *It;
+ bool WasInserted;
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
+ ResultIt;
+ std::tie(ResultIt, WasInserted) =
+ InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+ unsigned MINumber = ResultIt->second;
+
+ // There was an insertion.
+ if (WasInserted) {
+ LegalInstrNumber++;
+ IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
+ }
+
+ UnsignedVec.push_back(MINumber);
+
+ // Make sure we don't overflow or use any integers reserved by the DenseMap.
+ if (LegalInstrNumber >= IllegalInstrNumber)
+ report_fatal_error("Instruction mapping overflow!");
+
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+
+ return MINumber;
+ }
+
+ /// Maps \p *It to an illegal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned MINumber = IllegalInstrNumber;
+
+ InstrList.push_back(It);
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ return MINumber;
+ }
+
+ /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
+ /// and appends it to \p UnsignedVec and \p InstrList.
+ ///
+ /// Two instructions are assigned the same integer if they are identical.
+ /// If an instruction is deemed unsafe to outline, then it will be assigned an
+ /// unique integer. The resulting mapping is placed into a suffix tree and
+ /// queried for candidates.
+ ///
+ /// \param MBB The \p MachineBasicBlock to be translated into integers.
+ /// \param TRI \p TargetRegisterInfo for the module.
+ /// \param TII \p TargetInstrInfo for the module.
+ void convertToUnsignedVec(MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
+ It++) {
+
+ // Keep track of where this instruction is in the module.
+ switch(TII.getOutliningType(*It)) {
+ case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+ mapToIllegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+ mapToLegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+ break;
+ }
+ }
+
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ InstrList.push_back(MBB.end());
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ }
+
+ InstructionMapper() {
+ // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
+ // changed.
+ assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+ }
+};
+
+/// \brief An interprocedural pass which finds repeated sequences of
+/// instructions and replaces them with calls to functions.
+///
+/// Each instruction is mapped to an unsigned integer and placed in a string.
+/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
+/// is then repeatedly queried for repeated sequences of instructions. Each
+/// non-overlapping repeated sequence is then placed in its own
+/// \p MachineFunction and each instance is then replaced with a call to that
+/// function.
+struct MachineOutliner : public ModulePass {
+
+ static char ID;
+
+ StringRef getPassName() const override { return "Machine Outliner"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfo>();
+ AU.addPreserved<MachineModuleInfo>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ MachineOutliner() : ModulePass(ID) {
+ initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// \brief Replace the sequences of instructions represented by the
+ /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
+ /// described in \p FunctionList.
+ ///
+ /// \param M The module we are outlining from.
+ /// \param CandidateList A list of candidates to be outlined.
+ /// \param FunctionList A list of functions to be inserted into the module.
+ /// \param Mapper Contains the instruction mappings for the module.
+ bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper);
+
+ /// Creates a function for \p OF and inserts it into the module.
+ MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper);
+
+ /// Find potential outlining candidates and store them in \p CandidateList.
+ ///
+ /// For each type of potential candidate, also build an \p OutlinedFunction
+ /// struct containing the information to build the function for that
+ /// candidate.
+ ///
+ /// \param[out] CandidateList Filled with outlining candidates for the module.
+ /// \param[out] FunctionList Filled with functions corresponding to each type
+ /// of \p Candidate.
+ /// \param ST The suffix tree for the module.
+ /// \param TII TargetInstrInfo for the module.
+ ///
+ /// \returns The length of the longest candidate found. 0 if there are none.
+ unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII);
+
+ /// \brief Remove any overlapping candidates that weren't handled by the
+ /// suffix tree's pruning method.
+ ///
+ /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
+ /// If a short candidate is chosen for outlining, then a longer candidate
+ /// which has that short candidate as a suffix is chosen, the tree's pruning
+ /// method will not find it. Thus, we need to prune before outlining as well.
+ ///
+ /// \param[in,out] CandidateList A list of outlining candidates.
+ /// \param[in,out] FunctionList A list of functions to be outlined.
+ /// \param MaxCandidateLen The length of the longest candidate.
+ /// \param TII TargetInstrInfo for the module.
+ void pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII);
+
+ /// Construct a suffix tree on the instructions in \p M and outline repeated
+ /// strings from that tree.
+ bool runOnModule(Module &M) override;
+};
+
+} // Anonymous namespace.
+
+char MachineOutliner::ID = 0;
+
+namespace llvm {
+ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
+}
+
+INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE,
+ "Machine Function Outliner", false, false)
+
+void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII) {
+ // TODO: Experiment with interval trees or other interval-checking structures
+ // to lower the time complexity of this function.
+ // TODO: Can we do better than the simple greedy choice?
+ // Check for overlaps in the range.
+ // This is O(MaxCandidateLen * CandidateList.size()).
+ for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
+ It++) {
+ Candidate &C1 = *It;
+ OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
+
+ // If we removed this candidate, skip it.
+ if (!C1.InCandidateList)
+ continue;
+
+ // Is it still worth it to outline C1?
+ if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ C1.InCandidateList = false;
+ continue;
+ }
+
+ // The minimum start index of any candidate that could overlap with this
+ // one.
+ unsigned FarthestPossibleIdx = 0;
+
+ // Either the index is 0, or it's at most MaxCandidateLen indices away.
+ if (C1.StartIdx > MaxCandidateLen)
+ FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
+
+ // Compare against the candidates in the list that start at at most
+ // FarthestPossibleIdx indices away from C1. There are at most
+ // MaxCandidateLen of these.
+ for (auto Sit = It + 1; Sit != Et; Sit++) {
+ Candidate &C2 = *Sit;
+ OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
+
+ // Is this candidate too far away to overlap?
+ if (C2.StartIdx < FarthestPossibleIdx)
+ break;
+
+ // Did we already remove this candidate in a previous step?
+ if (!C2.InCandidateList)
+ continue;
+
+ // Is the function beneficial to outline?
+ if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
+ // If not, remove this candidate and move to the next one.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ C2.InCandidateList = false;
+ continue;
+ }
+
+ size_t C2End = C2.StartIdx + C2.Len - 1;
+
+ // Do C1 and C2 overlap?
+ //
+ // Not overlapping:
+ // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
+ //
+ // We sorted our candidate list so C2Start <= C1Start. We know that
+ // C2End > C2Start since each candidate has length >= 2. Therefore, all we
+ // have to check is C2End < C2Start to see if we overlap.
+ if (C2End < C1.StartIdx)
+ continue;
+
+ // C1 and C2 overlap.
+ // We need to choose the better of the two.
+ //
+ // Approximate this by picking the one which would have saved us the
+ // most instructions before any pruning.
+ if (C1.Benefit >= C2.Benefit) {
+
+ // C1 is better, so remove C2 and update C2's OutlinedFunction to
+ // reflect the removal.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
+ F2.OccurrenceCount,
+ F2.IsTailCall
+ );
+
+ C2.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C2. \n";
+ dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n";
+ dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";
+ );
+
+ } else {
+ // C2 is better, so remove C1 and update C1's OutlinedFunction to
+ // reflect the removal.
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
+ F1.OccurrenceCount,
+ F1.IsTailCall
+ );
+ C1.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C1. \n";
+ dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n";
+ dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";
+ );
+
+ // C1 is out, so we don't have to compare it against anyone else.
+ break;
+ }
+ }
+ }
+}
+
+unsigned
+MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII) {
+
+ std::vector<unsigned> CandidateSequence; // Current outlining candidate.
+ size_t MaxCandidateLen = 0; // Length of the longest candidate.
+
+ // Function for maximizing query in the suffix tree.
+ // This allows us to define more fine-grained types of things to outline in
+ // the target without putting target-specific info in the suffix tree.
+ auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr,
+ size_t StringLen, unsigned EndVal) {
+
+ // The root represents the empty string.
+ if (Curr.isRoot())
+ return 0u;
+
+ // Is this long enough to outline?
+ // TODO: Let the target decide how "long" a string is in terms of the sizes
+ // of the instructions in the string. For example, if a call instruction
+ // is smaller than a one instruction string, we should outline that string.
+ if (StringLen < 2)
+ return 0u;
+
+ size_t Occurrences = Curr.OccurrenceCount;
+
+ // Anything we want to outline has to appear at least twice.
+ if (Occurrences < 2)
+ return 0u;
+
+ // Check if the last instruction in the sequence is a return.
+ MachineInstr *LastInstr =
+ Mapper.IntegerInstructionMap[EndVal];
+ assert(LastInstr && "Last instruction in sequence was unmapped!");
+
+ // The only way a terminator could be mapped as legal is if it was safe to
+ // tail call.
+ bool IsTailCall = LastInstr->isTerminator();
+ return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall);
+ };
+
+ MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn);
+
+ for (auto &OF : FunctionList)
+ OF.IsTailCall = Mapper.
+ IntegerInstructionMap[OF.Sequence.back()]->isTerminator();
+
+ // Sort the candidates in decending order. This will simplify the outlining
+ // process when we have to remove the candidates from the mapping by
+ // allowing us to cut them out without keeping track of an offset.
+ std::stable_sort(CandidateList.begin(), CandidateList.end());
+
+ return MaxCandidateLen;
+}
+
+MachineFunction *
+MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper) {
+
+ // Create the function name. This should be unique. For now, just hash the
+ // module name and include it in the function name plus the number of this
+ // function.
+ std::ostringstream NameStream;
+ NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name;
+
+ // Create the function using an IR-level function.
+ LLVMContext &C = M.getContext();
+ Function *F = dyn_cast<Function>(
+ M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
+ assert(F && "Function was null!");
+
+ // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
+ // which gives us better results when we outline from linkonceodr functions.
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+ IRBuilder<> Builder(EntryBB);
+ Builder.CreateRetVoid();
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert the new function into the module.
+ MF.insert(MF.begin(), &MBB);
+
+ TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall);
+
+ // Copy over the instructions for the function using the integer mappings in
+ // its sequence.
+ for (unsigned Str : OF.Sequence) {
+ MachineInstr *NewMI =
+ MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
+ NewMI->dropMemRefs();
+
+ // Don't keep debug information for outlined instructions.
+ // FIXME: This means outlined functions are currently undebuggable.
+ NewMI->setDebugLoc(DebugLoc());
+ MBB.insert(MBB.end(), NewMI);
+ }
+
+ TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall);
+
+ return &MF;
+}
+
+bool MachineOutliner::outline(Module &M,
+ const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper) {
+
+ bool OutlinedSomething = false;
+
+ // Replace the candidates with calls to their respective outlined functions.
+ for (const Candidate &C : CandidateList) {
+
+ // Was the candidate removed during pruneOverlaps?
+ if (!C.InCandidateList)
+ continue;
+
+ // If not, then look at its OutlinedFunction.
+ OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+
+ // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+ if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
+ continue;
+
+ // If not, then outline it.
+ assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
+ unsigned EndIdx = C.StartIdx + C.Len - 1;
+
+ assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ assert(EndIt != MBB->end() && "EndIt out of bounds!");
+
+ EndIt++; // Erase needs one past the end index.
+
+ // Does this candidate have a function yet?
+ if (!OF.MF) {
+ OF.MF = createOutlinedFunction(M, OF, Mapper);
+ FunctionsCreated++;
+ }
+
+ MachineFunction *MF = OF.MF;
+ const TargetSubtargetInfo &STI = MF->getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert a call to the new function and erase the old sequence.
+ TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall);
+ StartIt = Mapper.InstrList[C.StartIdx];
+ MBB->erase(StartIt, EndIt);
+
+ OutlinedSomething = true;
+
+ // Statistics.
+ NumOutlined++;
+ }
+
+ DEBUG (
+ dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";
+ );
+
+ return OutlinedSomething;
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+
+ // Is there anything in the module at all?
+ if (M.empty())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin())
+ .getSubtarget();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+
+ InstructionMapper Mapper;
+
+ // Build instruction mappings for each function in the module.
+ for (Function &F : M) {
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+
+ // Is the function empty? Safe to outline from?
+ if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
+ continue;
+
+ // If it is, look at each MachineBasicBlock in the function.
+ for (MachineBasicBlock &MBB : MF) {
+
+ // Is there anything in MBB?
+ if (MBB.empty())
+ continue;
+
+ // If yes, map it.
+ Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
+ }
+ }
+
+ // Construct a suffix tree, use it to find candidates, and then outline them.
+ SuffixTree ST(Mapper.UnsignedVec);
+ std::vector<Candidate> CandidateList;
+ std::vector<OutlinedFunction> FunctionList;
+
+ // Find all of the outlining candidates.
+ unsigned MaxCandidateLen =
+ buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
+
+ // Remove candidates that overlap with other candidates.
+ pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+
+ // Outline each of the candidates and return true if something was outlined.
+ return outline(M, CandidateList, FunctionList, Mapper);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
index 43a1809..19e9a50 100644
--- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -61,7 +61,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SetVector.h"
@@ -69,6 +68,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -552,7 +552,9 @@ public:
os << "\n";
}
- void dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
};
/// This class repesents the scheduled code. The main data structure is a
@@ -593,7 +595,7 @@ private:
/// Virtual register information.
MachineRegisterInfo &MRI;
- DFAPacketizer *Resources;
+ std::unique_ptr<DFAPacketizer> Resources;
public:
SMSchedule(MachineFunction *mf)
@@ -604,13 +606,6 @@ public:
InitiationInterval = 0;
}
- ~SMSchedule() {
- ScheduledInstrs.clear();
- InstrToCycle.clear();
- RegToStageDiff.clear();
- delete Resources;
- }
-
void reset() {
ScheduledInstrs.clear();
InstrToCycle.clear();
@@ -720,13 +715,13 @@ char MachinePipeliner::ID = 0;
int MachinePipeliner::NumTries = 0;
#endif
char &llvm::MachinePipelinerID = MachinePipeliner::ID;
-INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
+INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
+INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
"Modulo Software Pipelining", false, false)
/// The "main" function for implementing Swing Modulo Scheduling.
@@ -738,7 +733,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
return false;
if (mf.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -960,7 +955,7 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
if (Phi.getOperand(i + 1).getMBB() != Loop)
InitVal = Phi.getOperand(i).getReg();
- else if (Phi.getOperand(i + 1).getMBB() == Loop)
+ else
LoopVal = Phi.getOperand(i).getReg();
assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
@@ -2514,7 +2509,7 @@ void SwingSchedulerDAG::generateExistingPhis(
MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
bool IsLast) {
- // Compute the stage number for the inital value of the Phi, which
+ // Compute the stage number for the initial value of the Phi, which
// comes from the prolog. The prolog to use depends on to which kernel/
// epilog that we're adding the Phi.
unsigned PrologStage = 0;
@@ -3480,7 +3475,7 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
// increment value to determine if the accesses may be loop carried.
if (OffsetS >= OffsetD)
return OffsetS + AccessSizeS > DeltaS;
- else if (OffsetS < OffsetD)
+ else
return OffsetD + AccessSizeD > DeltaD;
return true;
@@ -3980,5 +3975,7 @@ void SMSchedule::print(raw_ostream &os) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Utility function used for debugging to print the schedule.
-void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
index c3f6e92..4883779 100644
--- a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -16,6 +16,10 @@
using namespace llvm;
+namespace llvm {
+template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
+}
+
char MachinePostDominatorTree::ID = 0;
//declare initializeMachinePostDominatorTreePass
@@ -24,8 +28,7 @@ INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
- // postdominator
+ DT = new PostDomTreeBase<MachineBasicBlock>();
}
FunctionPass *
diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
index fc32183..1e74104 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,10 +1,21 @@
+//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineRegionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
-#define DEBUG_TYPE "region"
+#define DEBUG_TYPE "machine-region-info"
using namespace llvm;
@@ -12,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions");
STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
namespace llvm {
+
template class RegionBase<RegionTraits<MachineFunction>>;
template class RegionNodeBase<RegionTraits<MachineFunction>>;
template class RegionInfoBase<RegionTraits<MachineFunction>>;
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// MachineRegion implementation
-//
MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
MachineRegionInfo* RI,
MachineDominatorTree *DT, MachineRegion *Parent) :
- RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
-
-}
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {}
-MachineRegion::~MachineRegion() { }
+MachineRegion::~MachineRegion() = default;
//===----------------------------------------------------------------------===//
// MachineRegionInfo implementation
-//
-
-MachineRegionInfo::MachineRegionInfo() :
- RegionInfoBase<RegionTraits<MachineFunction>>() {
-}
+MachineRegionInfo::MachineRegionInfo() = default;
-MachineRegionInfo::~MachineRegionInfo() {
-
-}
+MachineRegionInfo::~MachineRegionInfo() = default;
void MachineRegionInfo::updateStatistics(MachineRegion *R) {
++numMachineRegions;
@@ -74,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-MachineRegionInfoPass::~MachineRegionInfoPass() {
-
-}
+MachineRegionInfoPass::~MachineRegionInfoPass() = default;
bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
releaseMemory();
@@ -86,6 +88,9 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
auto DF = &getAnalysis<MachineDominanceFrontier>();
RI.recalculate(F, DT, PDT, DF);
+
+ DEBUG(RI.dump());
+
return false;
}
@@ -103,9 +108,10 @@ void MachineRegionInfoPass::verifyAnalysis() const {
void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<DominanceFrontierWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
@@ -119,22 +125,24 @@ LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
#endif
char MachineRegionInfoPass::ID = 0;
+char &MachineRegionInfoPassID = MachineRegionInfoPass::ID;
-INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
-INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
// the link time optimization.
namespace llvm {
- FunctionPass *createMachineRegionInfoPass() {
- return new MachineRegionInfoPass();
- }
+
+FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 242cb0b..9a92ee2 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -28,9 +42,9 @@ static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr),
- TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness) {
+ : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness),
+ IsUpdatedCSRsInitialized(false) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -444,8 +458,8 @@ LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
return TRC.getLaneMask();
}
-#ifndef NDEBUG
-void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (MachineInstr &I : use_instructions(Reg))
I.dump();
}
@@ -543,3 +557,47 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
}
return false;
}
+
+void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ assert(Reg && (Reg < TRI->getNumRegs()) &&
+ "Trying to disable an invalid register");
+
+ if (!IsUpdatedCSRsInitialized) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+
+ IsUpdatedCSRsInitialized = true;
+ }
+
+ // Remove the register (and its aliases from the list).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
+ UpdatedCSRs.end());
+}
+
+const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
+ if (IsUpdatedCSRsInitialized)
+ return UpdatedCSRs.data();
+
+ return getTargetRegisterInfo()->getCalleeSavedRegs(MF);
+}
+
+void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
+ if (IsUpdatedCSRsInitialized)
+ UpdatedCSRs.clear();
+
+ for (MCPhysReg Reg : CSRs)
+ UpdatedCSRs.push_back(Reg);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+ IsUpdatedCSRsInitialized = true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index e06bc51..eaba9a5 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -13,29 +13,66 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
namespace llvm {
+
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
@@ -43,7 +80,8 @@ cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
-}
+
+} // end namespace llvm
#ifndef NDEBUG
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -80,10 +118,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
-// Experimental heuristics
-static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
- cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-
static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
@@ -92,14 +126,14 @@ static const unsigned MinSubtreeSize = 8;
// Pin the vtables to this file.
void MachineSchedStrategy::anchor() {}
+
void ScheduleDAGMutation::anchor() {}
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
-MachineSchedContext::MachineSchedContext():
- MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
+MachineSchedContext::MachineSchedContext() {
RegClassInfo = new RegisterClassInfo();
}
@@ -108,6 +142,7 @@ MachineSchedContext::~MachineSchedContext() {
}
namespace {
+
/// Base class for a machine scheduler class that can run at any point.
class MachineSchedulerBase : public MachineSchedContext,
public MachineFunctionPass {
@@ -149,18 +184,20 @@ public:
protected:
ScheduleDAGInstrs *createPostMachineScheduler();
};
-} // namespace
+
+} // end anonymous namespace
char MachineScheduler::ID = 0;
char &llvm::MachineSchedulerID = MachineScheduler::ID;
-INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
+INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
+INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
MachineScheduler::MachineScheduler()
@@ -211,7 +248,7 @@ static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
/// MachineSchedOpt allows command line selection of the scheduler.
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
- RegisterPassParser<MachineSchedRegistry> >
+ RegisterPassParser<MachineSchedRegistry>>
MachineSchedOpt("misched",
cl::init(&useDefaultMachineSched), cl::Hidden,
cl::desc("Machine instruction scheduler to use"));
@@ -448,7 +485,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for (;I != MBB->begin(); --I) {
+ for (; I != MBB->begin(); --I) {
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
@@ -495,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// thumb2 size reduction is currently an exception, so the PostMIScheduler
// needs to do this.
if (FixKillFlags)
- Scheduler.fixupKills(&*MBB);
+ Scheduler.fixupKills(*MBB);
}
Scheduler.finalizeSchedule();
}
@@ -504,13 +541,14 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-LLVM_DUMP_METHOD
-void ReadyQueue::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ReadyQueue::dump() const {
dbgs() << "Queue " << Name << ": ";
- for (unsigned i = 0, e = Queue.size(); i < e; ++i)
- dbgs() << Queue[i]->NodeNum << " ";
+ for (const SUnit *SU : Queue)
+ dbgs() << SU->NodeNum << " ";
dbgs() << "\n";
}
+#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -519,8 +557,7 @@ void ReadyQueue::dump() {
// ===----------------------------------------------------------------------===/
// Provide a vtable anchor.
-ScheduleDAGMI::~ScheduleDAGMI() {
-}
+ScheduleDAGMI::~ScheduleDAGMI() = default;
bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
@@ -572,10 +609,8 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- releaseSucc(SU, &*I);
- }
+ for (SDep &Succ : SU->Succs)
+ releaseSucc(SU, &Succ);
}
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
@@ -611,10 +646,8 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
/// releasePredecessors - Call releasePred on each of SU's predecessors.
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- releasePred(SU, &*I);
- }
+ for (SDep &Pred : SU->Preds)
+ releasePred(SU, &Pred);
}
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
@@ -687,8 +720,8 @@ void ScheduleDAGMI::schedule() {
DEBUG(
if (EntrySU.getInstr() != nullptr)
EntrySU.dumpAll(this);
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this);
+ for (const SUnit &SU : SUnits)
+ SU.dumpAll(this);
if (ExitSU.getInstr() != nullptr)
ExitSU.dumpAll(this);
);
@@ -749,28 +782,25 @@ void ScheduleDAGMI::schedule() {
/// Apply each ScheduleDAGMutation step in order.
void ScheduleDAGMI::postprocessDAG() {
- for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
- Mutations[i]->apply(this);
- }
+ for (auto &m : Mutations)
+ m->apply(this);
}
void ScheduleDAGMI::
findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots) {
- for (std::vector<SUnit>::iterator
- I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
- SUnit *SU = &(*I);
- assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+ for (SUnit &SU : SUnits) {
+ assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
// Order predecessors so DFSResult follows the critical path.
- SU->biasCriticalPath();
+ SU.biasCriticalPath();
// A SUnit is ready to top schedule if it has no predecessors.
- if (!I->NumPredsLeft)
- TopRoots.push_back(SU);
+ if (!SU.NumPredsLeft)
+ TopRoots.push_back(&SU);
// A SUnit is ready to bottom schedule if it has no successors.
- if (!I->NumSuccsLeft)
- BotRoots.push_back(SU);
+ if (!SU.NumSuccsLeft)
+ BotRoots.push_back(&SU);
}
ExitSU.biasCriticalPath();
}
@@ -785,10 +815,9 @@ void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
//
// Nodes with unreleased weak edges can still be roots.
// Release top roots in forward order.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
- SchedImpl->releaseTopNode(*I);
- }
+ for (SUnit *SU : TopRoots)
+ SchedImpl->releaseTopNode(SU);
+
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
@@ -825,7 +854,7 @@ void ScheduleDAGMI::placeDebugValues() {
RegionBegin = FirstDbgValue;
}
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
MachineInstr *DbgValue = P.first;
@@ -841,7 +870,7 @@ void ScheduleDAGMI::placeDebugValues() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ScheduleDAGMI::dumpSchedule() const {
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
SU->dump(this);
@@ -992,9 +1021,9 @@ void ScheduleDAGMILive::initRegPressure() {
}
}
DEBUG(dbgs() << "Excess PSets: ";
- for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ for (const PressureChange &RCPS : RegionCriticalPSets)
dbgs() << TRI->getRegPressureSetName(
- RegionCriticalPSets[i].getPSet()) << " ";
+ RCPS.getPSet()) << " ";
dbgs() << "\n");
}
@@ -1003,16 +1032,15 @@ updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure) {
const PressureDiff &PDiff = getPressureDiff(SU);
unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
- for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
- I != E; ++I) {
- if (!I->isValid())
+ for (const PressureChange &PC : PDiff) {
+ if (!PC.isValid())
break;
- unsigned ID = I->getPSet();
+ unsigned ID = PC.getPSet();
while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
++CritIdx;
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
- && NewMaxPressure[ID] <= INT16_MAX)
+ && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
}
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
@@ -1136,6 +1164,12 @@ void ScheduleDAGMILive::schedule() {
dbgs() << " Pressure Diff : ";
getPressureDiff(&SU).dump(*TRI);
}
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
dbgs() << '\n';
}
if (ExitSU.getInstr() != nullptr)
@@ -1396,6 +1430,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create cluster edges between neighboring
/// loads or between neighboring stores.
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
@@ -1403,6 +1438,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SUnit *SU;
unsigned BaseReg;
int64_t Offset;
+
MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
: SU(su), BaseReg(reg), Offset(ofs) {}
@@ -1439,31 +1475,31 @@ public:
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
: BaseMemOpClusterMutation(tii, tri, true) {}
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
: nullptr;
}
-} // namespace llvm
+} // end namespace llvm
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
- for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
- SUnit *SU = MemOps[Idx];
+ for (SUnit *SU : MemOps) {
unsigned BaseReg;
int64_t Offset;
if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
@@ -1491,12 +1527,11 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
// dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since nearby
// loads should have effectively the same inputs.
- for (SUnit::const_succ_iterator
- SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
- if (SI->getSUnit() == SUb)
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
continue;
- DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
- DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
} else
@@ -1513,17 +1548,15 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent MemOps.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if ((IsLoad && !SU->getInstr()->mayLoad()) ||
- (!IsLoad && !SU->getInstr()->mayStore()))
+ for (SUnit &SU : DAG->SUnits) {
+ if ((IsLoad && !SU.getInstr()->mayLoad()) ||
+ (!IsLoad && !SU.getInstr()->mayStore()))
continue;
unsigned ChainPredID = DAG->SUnits.size();
- for (SUnit::const_pred_iterator
- PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
- if (PI->isCtrl()) {
- ChainPredID = PI->getSUnit()->NodeNum;
+ for (const SDep &Pred : SU.Preds) {
+ if (Pred.isCtrl()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
break;
}
}
@@ -1534,82 +1567,12 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
if (Result.second)
StoreChainDependents.resize(NumChains + 1);
- StoreChainDependents[Result.first->second].push_back(SU);
+ StoreChainDependents[Result.first->second].push_back(&SU);
}
// Iterate over the store chains.
- for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
- clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
-}
-
-//===----------------------------------------------------------------------===//
-// MacroFusion - DAG post-processing to encourage fusion of macro ops.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// \brief Post-process the DAG to create cluster edges between instructions
-/// that may be fused by the processor into a single operation.
-class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo &TII;
-public:
- MacroFusion(const TargetInstrInfo &TII)
- : TII(TII) {}
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override;
-};
-} // anonymous
-
-namespace llvm {
-
-std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
- return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
-}
-
-} // namespace llvm
-
-/// \brief Callback from DAG postProcessing to create cluster edges to encourage
-/// fused operations.
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
- // For now, assume targets can only fuse with the branch.
- SUnit &ExitSU = DAG->ExitSU;
- MachineInstr *Branch = ExitSU.getInstr();
- if (!Branch)
- return;
-
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.isWeak())
- continue;
- SUnit &SU = *PredDep.getSUnit();
- MachineInstr &Pred = *SU.getInstr();
- if (!TII.shouldScheduleAdjacent(Pred, *Branch))
- continue;
-
- // Create a single weak edge from SU to ExitSU. The only effect is to cause
- // bottom-up scheduling to heavily prioritize the clustered SU. There is no
- // need to copy predecessor edges from ExitSU to SU, since top-down
- // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
- // of SU, we could create an artificial edge from the deepest root, but it
- // hasn't been needed yet.
- bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
- (void)Success;
- assert(Success && "No DAG nodes should be reachable from ExitSU");
-
- // Adjust latency of data deps between the nodes.
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.getSUnit() == &SU)
- PredDep.setLatency(0);
- }
- for (SDep &SuccDep : SU.Succs) {
- if (SuccDep.getSUnit() == &ExitSU)
- SuccDep.setLatency(0);
- }
-
- DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
- break;
- }
+ for (auto &SCD : StoreChainDependents)
+ clusterNeighboringMemOps(SCD, DAG);
}
//===----------------------------------------------------------------------===//
@@ -1617,6 +1580,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create weak edges from all uses of a copy to
/// the one use that defines the copy's source vreg, most likely an induction
/// variable increment.
@@ -1626,6 +1590,7 @@ class CopyConstrain : public ScheduleDAGMutation {
// RegionEndIdx is the slot index of the last non-debug instruction in the
// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
SlotIndex RegionEndIdx;
+
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
@@ -1634,17 +1599,18 @@ public:
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- return make_unique<CopyConstrain>(TII, TRI);
+ const TargetRegisterInfo *TRI) {
+ return llvm::make_unique<CopyConstrain>(TII, TRI);
}
-} // namespace llvm
+} // end namespace llvm
/// constrainLocalCopy handles two possibilities:
/// 1) Local src:
@@ -1749,16 +1715,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
- for (SUnit::const_succ_iterator
- I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ for (const SDep &Succ : LastLocalSU->Succs) {
+ if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
continue;
- if (I->getSUnit() == GlobalSU)
+ if (Succ.getSUnit() == GlobalSU)
continue;
- if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
return;
- LocalUses.push_back(I->getSUnit());
+ LocalUses.push_back(Succ.getSUnit());
}
// Open the top of the GlobalLI hole by constraining any earlier global uses
// to precede the start of LocalLI.
@@ -1766,15 +1730,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
MachineInstr *FirstLocalDef =
LIS->getInstructionFromIndex(LocalLI->beginIndex());
SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
- for (SUnit::const_pred_iterator
- I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
- if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ for (const SDep &Pred : GlobalSU->Preds) {
+ if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
continue;
- if (I->getSUnit() == FirstLocalSU)
+ if (Pred.getSUnit() == FirstLocalSU)
continue;
- if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
return;
- GlobalUses.push_back(I->getSUnit());
+ GlobalUses.push_back(Pred.getSUnit());
}
DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
// Add the weak edges.
@@ -1805,12 +1768,11 @@ void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
RegionEndIdx = DAG->getLIS()->getInstructionIndex(
*priorNonDebug(DAG->end(), DAG->begin()));
- for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
- SUnit *SU = &DAG->SUnits[Idx];
- if (!SU->getInstr()->isCopy())
+ for (SUnit &SU : DAG->SUnits) {
+ if (!SU.getInstr()->isCopy())
continue;
- constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
+ constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
}
}
@@ -1836,7 +1798,7 @@ void SchedBoundary::reset() {
CheckPending = false;
CurrCycle = 0;
CurrMOps = 0;
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
ExpectedLatency = 0;
DependentLatency = 0;
RetiredMOps = 0;
@@ -1861,10 +1823,9 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
if (!SchedModel->hasInstrSchedModel())
return;
RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
- for (std::vector<SUnit>::iterator
- I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
- const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
- RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+ for (SUnit &SU : DAG->SUnits) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
+ RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
* SchedModel->getMicroOpFactor();
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
@@ -1937,12 +1898,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
return true;
}
+
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
+
+ if (CurrMOps > 0 &&
+ ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
+ DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
+ << (isTop()? "begin" : "end") << " group\n");
+ return true;
+ }
+
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
for (TargetSchedModel::ProcResIter
@@ -1968,12 +1939,11 @@ unsigned SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
SUnit *LateSU = nullptr;
unsigned RemLatency = 0;
- for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
- I != E; ++I) {
- unsigned L = getUnscheduledLatency(*I);
+ for (SUnit *SU : ReadySUs) {
+ unsigned L = getUnscheduledLatency(SU);
if (L > RemLatency) {
RemLatency = L;
- LateSU = *I;
+ LateSU = SU;
}
}
if (LateSU) {
@@ -2039,7 +2009,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
/// Move the boundary of scheduled code by one cycle.
void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (SchedModel->getMicroOpBufferSize() == 0) {
- assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
if (MinReadyCycle > NextCycle)
NextCycle = MinReadyCycle;
}
@@ -2237,6 +2208,18 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// one cycle. Since we commonly reach the max MOps here, opportunistically
// bump the cycle to avoid uselessly checking everything in the readyQ.
CurrMOps += IncMOps;
+
+ // Bump the cycle count for issue group constraints.
+ // This must be done after NextCycle has been adjust for all other stalls.
+ // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
+ // currCycle to X.
+ if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
+ DEBUG(dbgs() << " Bump cycle to "
+ << (isTop() ? "end" : "begin") << " group\n");
+ bumpCycle(++NextCycle);
+ }
+
while (CurrMOps >= SchedModel->getIssueWidth()) {
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');
@@ -2250,7 +2233,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
void SchedBoundary::releasePending() {
// If the available queue is empty, it is safe to reset MinReadyCycle.
if (Available.empty())
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
@@ -2323,10 +2306,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
return nullptr;
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-void SchedBoundary::dumpScheduledState() {
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -2665,12 +2648,15 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
}
-void GenericScheduler::dumpPolicy() {
+void GenericScheduler::dumpPolicy() const {
+ // Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "GenericScheduler RegionPolicy: "
<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
<< "\n";
+#endif
}
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
@@ -2714,17 +2700,16 @@ void GenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (std::vector<SUnit*>::const_iterator
- I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : Bot.Available) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
}
- if (EnableCyclicPath) {
+ if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
checkAcyclicLatency();
}
@@ -2964,10 +2949,10 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
ReadyQueue &Q = Zone.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
- initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
tryCandidate(Cand, TryCand, ZoneArg);
@@ -3106,7 +3091,6 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
}
void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
-
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3114,18 +3098,17 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
// Find already scheduled copies with a single physreg dependence and move
// them just above the scheduled instruction.
- for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
- I != E; ++I) {
- if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ for (SDep &Dep : Deps) {
+ if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
continue;
- SUnit *DepSU = I->getSUnit();
+ SUnit *DepSU = Dep.getSUnit();
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
continue;
MachineInstr *Copy = DepSU->getInstr();
if (!Copy->isCopy())
continue;
DEBUG(dbgs() << " Rescheduling physreg copy ";
- I->getSUnit()->dump(DAG));
+ Dep.getSUnit()->dump(DAG));
DAG->moveInstruction(Copy, InsertPos);
}
}
@@ -3154,7 +3137,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -3195,15 +3179,13 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
}
}
-
void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
- for (SmallVectorImpl<SUnit*>::const_iterator
- I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
- if ((*I)->getDepth() > Rem.CriticalPath)
- Rem.CriticalPath = (*I)->getDepth();
+ for (const SUnit *SU : BotRoots) {
+ if (SU->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = SU->getDepth();
}
DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
if (DumpCriticalPathLength) {
@@ -3229,6 +3211,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(),
+ TryCand, Cand, Cluster))
+ return;
+
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
@@ -3250,9 +3238,9 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
ReadyQueue &Q = Top.Available;
- for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ for (SUnit *SU : Q) {
SchedCandidate TryCand(Cand.Policy);
- TryCand.SU = *I;
+ TryCand.SU = SU;
TryCand.AtTop = true;
TryCand.initResourceDelta(DAG, SchedModel);
tryCandidate(Cand, TryCand);
@@ -3302,7 +3290,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+ return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
/*RemoveKillFlags=*/true);
}
@@ -3311,14 +3299,14 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Order nodes by the ILP metric.
struct ILPOrder {
- const SchedDFSResult *DFSResult;
- const BitVector *ScheduledTrees;
+ const SchedDFSResult *DFSResult = nullptr;
+ const BitVector *ScheduledTrees = nullptr;
bool MaximizeILP;
- ILPOrder(bool MaxILP)
- : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
+ ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
/// \brief Apply a less-than relation on node priority.
///
@@ -3347,12 +3335,13 @@ struct ILPOrder {
/// \brief Schedule based on the ILP metric.
class ILPScheduler : public MachineSchedStrategy {
- ScheduleDAGMILive *DAG;
+ ScheduleDAGMILive *DAG = nullptr;
ILPOrder Cmp;
std::vector<SUnit*> ReadyQ;
+
public:
- ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
+ ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
void initialize(ScheduleDAGMI *dag) override {
assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
@@ -3405,14 +3394,16 @@ public:
std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
}
+
static MachineSchedRegistry ILPMaxRegistry(
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
static MachineSchedRegistry ILPMinRegistry(
@@ -3424,6 +3415,7 @@ static MachineSchedRegistry ILPMinRegistry(
#ifndef NDEBUG
namespace {
+
/// Apply a less-than relation on the node order, which corresponds to the
/// instruction order prior to scheduling. IsReverse implements greater-than.
template<bool IsReverse>
@@ -3444,11 +3436,12 @@ class InstructionShuffler : public MachineSchedStrategy {
// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
// gives nodes with a higher number higher priority causing the latest
// instructions to be scheduled first.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
TopQ;
// When scheduling bottom-up, use greater-than as the queue priority.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
BottomQ;
+
public:
InstructionShuffler(bool alternate, bool topdown)
: IsAlternating(alternate), IsTopDown(topdown) {}
@@ -3492,15 +3485,18 @@ public:
BottomQ.push(SU);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
bool Alternate = !ForceTopDown && !ForceBottomUp;
bool TopDown = !ForceBottomUp;
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
- return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
+ return new ScheduleDAGMILive(
+ C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
}
+
static MachineSchedRegistry ShufflerRegistry(
"shuffle", "Shuffle machine instructions alternating directions",
createInstructionShuffler);
@@ -3518,8 +3514,7 @@ template<> struct GraphTraits<
template<>
struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
-
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
return G->MF.getName();
@@ -3576,7 +3571,8 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
return Str;
}
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // NDEBUG
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 5f87b68..79e3fea 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SparseBitVector.h"
@@ -33,6 +32,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -173,14 +173,14 @@ namespace {
char MachineSinking::ID = 0;
char &llvm::MachineSinkingID = MachineSinking::ID;
-INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
- "Machine code sinking", false, false)
+INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MachineSinking, "machine-sink",
- "Machine code sinking", false, false)
+INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
+ "Machine code sinking", false, false)
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB) {
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index ef7e525..6c5abc6 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,4 +1,4 @@
-//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,20 +8,34 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -30,16 +44,14 @@ using namespace llvm;
char MachineTraceMetrics::ID = 0;
char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
-INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
- "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineTraceMetrics,
- "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
+ "Machine Trace Metrics", false, true)
-MachineTraceMetrics::MachineTraceMetrics()
- : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
- MRI(nullptr), Loops(nullptr) {
+MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
}
@@ -137,7 +149,6 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
-
//===----------------------------------------------------------------------===//
// Ensemble utility functions
//===----------------------------------------------------------------------===//
@@ -151,7 +162,7 @@ MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
}
// Virtual destructor serves as an anchor.
-MachineTraceMetrics::Ensemble::~Ensemble() {}
+MachineTraceMetrics::Ensemble::~Ensemble() = default;
const MachineLoop*
MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
@@ -297,6 +308,7 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
// MinInstrCountEnsemble - Pick the trace that executes the least number of
// instructions.
namespace {
+
class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
const char *getName() const override { return "MinInstr"; }
const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
@@ -306,7 +318,8 @@ public:
MinInstrCountEnsemble(MachineTraceMetrics *mtm)
: MachineTraceMetrics::Ensemble(mtm) {}
};
-}
+
+} // end anonymous namespace
// Select the preferred predecessor for MBB.
const MachineBasicBlock*
@@ -409,25 +422,30 @@ void MachineTraceMetrics::verifyAnalysis() const {
// revisit blocks.
namespace {
+
struct LoopBounds {
MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
SmallPtrSet<const MachineBasicBlock*, 8> Visited;
const MachineLoopInfo *Loops;
- bool Downward;
+ bool Downward = false;
+
LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
- const MachineLoopInfo *loops)
- : Blocks(blocks), Loops(loops), Downward(false) {}
+ const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
};
-}
+
+} // end anonymous namespace
// Specialize po_iterator_storage in order to prune the post-order traversal so
// it is limited to the current loop and doesn't traverse the loop back edges.
namespace llvm {
+
template<>
class po_iterator_storage<LoopBounds, true> {
LoopBounds &LB;
+
public:
po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+
void finishPostorder(const MachineBasicBlock*) {}
bool insertEdge(Optional<const MachineBasicBlock *> From,
@@ -452,7 +470,8 @@ public:
return LB.Visited.insert(To).second;
}
};
-}
+
+} // end namespace llvm
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
@@ -603,6 +622,7 @@ void MachineTraceMetrics::Ensemble::verify() const {
// A data dependency is represented as a defining MI and operand numbers on the
// defining and using MI.
namespace {
+
struct DataDep {
const MachineInstr *DefMI;
unsigned DefOp;
@@ -622,7 +642,8 @@ struct DataDep {
assert((++DefI).atEnd() && "Register has multiple defs");
}
};
-}
+
+} // end anonymous namespace
// Get the input data dependencies that must be ready before UseMI can issue.
// Return true if UseMI has any physreg operands.
@@ -678,17 +699,19 @@ static void getPHIDeps(const MachineInstr &UseMI,
// direction instructions are scanned, it could be the operand that defined the
// regunit, or the highest operand to read the regunit.
namespace {
+
struct LiveRegUnit {
unsigned RegUnit;
- unsigned Cycle;
- const MachineInstr *MI;
- unsigned Op;
+ unsigned Cycle = 0;
+ const MachineInstr *MI = nullptr;
+ unsigned Op = 0;
unsigned getSparseSetIndex() const { return RegUnit; }
- LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
+ LiveRegUnit(unsigned RU) : RegUnit(RU) {}
};
-}
+
+} // end anonymous namespace
// Identify physreg dependencies for UseMI, and update the live regunit
// tracking set when scanning instructions downwards.
@@ -922,7 +945,6 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
return Height;
}
-
typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
// Push the height of DefMI upwards if required to match UseMI.
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index a98139f..fcb5448 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -23,7 +23,6 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -36,6 +35,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -87,7 +88,6 @@ namespace {
RegSet regsLive;
RegVector regsDefined, regsDead, regsKilled;
RegMaskVector regMasks;
- RegSet regsLiveInButUnused;
SlotIndex lastIndex;
@@ -188,8 +188,9 @@ namespace {
return Reg < regsReserved.size() && regsReserved.test(Reg);
}
- bool isAllocatable(unsigned Reg) {
- return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
+ bool isAllocatable(unsigned Reg) const {
+ return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
+ !regsReserved.test(Reg);
}
// Analysis information if available
@@ -260,8 +261,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
const std::string Banner;
- MachineVerifierPass(const std::string &banner = nullptr)
- : MachineFunctionPass(ID), Banner(banner) {
+ MachineVerifierPass(std::string banner = std::string())
+ : MachineFunctionPass(ID), Banner(std::move(banner)) {
initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
}
@@ -418,7 +419,6 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
regsDead.clear();
regsKilled.clear();
regMasks.clear();
- regsLiveInButUnused.clear();
MBBInfoMap.clear();
return foundErrors;
@@ -526,9 +526,11 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
- regsReserved = MRI->getReservedRegs();
+ regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs()
+ : TRI->getReservedRegs(*MF);
- markReachable(&MF->front());
+ if (!MF->empty())
+ markReachable(&MF->front());
// Build a set of the basic blocks in the function.
FunctionBlocks.clear();
@@ -548,7 +550,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
// Check that the register use lists are sane.
MRI->verifyUseLists();
- verifyStackFrame();
+ if (!MF->empty())
+ verifyStackFrame();
}
// Does iterator point to a and b as the first two elements?
@@ -572,7 +575,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB->getIterator() != MBB->getParent()->begin()) {
- report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
}
}
}
@@ -752,11 +755,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsLive.insert(*SubRegs);
}
}
- regsLiveInButUnused = regsLive;
const MachineFrameInfo &MFI = MF->getFrameInfo();
BitVector PR = MFI.getPristineRegs(*MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ for (unsigned I : PR.set_bits()) {
for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
regsLive.insert(*SubRegs);
@@ -911,6 +913,39 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch(MI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if (!MI->hasOneMemOperand())
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+ break;
+ case TargetOpcode::STATEPOINT:
+ if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NCallArgsPos).isImm())
+ report("meta operands to STATEPOINT not constant!", MI);
+ break;
+
+ auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (!MI->getOperand(Offset).isImm() ||
+ MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset + 1).isImm())
+ report("stack map constant to STATEPOINT not well formed!", MI);
+ };
+ const unsigned VarStart = StatepointOpers(MI).getVarIdx();
+ VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
+
+ // TODO: verify we have properly encoded deopt arguments
+ };
}
void
@@ -950,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Operand should be tied", MO, MONum);
else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) {
+ const MachineOperand &MOTied = MI->getOperand(TiedTo);
+ if (!MOTied.isReg())
+ report("Tied counterpart must be a register", &MOTied, TiedTo);
+ else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) &&
+ MO->getReg() != MOTied.getReg())
+ report("Tied physical registers must match.", &MOTied, TiedTo);
+ }
} else if (MO->isReg() && MO->isTied())
report("Explicit operand should not be tied", MO, MONum);
} else {
@@ -1256,8 +1299,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Both use and def operands can read a register.
if (MO->readsReg()) {
- regsLiveInButUnused.erase(Reg);
-
if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
@@ -1913,9 +1954,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
- // All predecessors must have a live-out value if this is not a
- // subregister liverange.
- if (!PVNI && LaneMask.none()) {
+ // All predecessors must have a live-out value. However for a phi
+ // instruction with subregister intervals
+ // only one of the subregisters (not necessarily the current one) needs to
+ // be defined.
+ if (!PVNI && (LaneMask.none() || !IsPHI) ) {
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
@@ -2020,6 +2063,8 @@ namespace {
void MachineVerifier::verifyStackFrame() {
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
SmallVector<StackStateOfBB, 8> SPState;
SPState.resize(MF->getNumBlockIDs());
@@ -2047,23 +2092,14 @@ void MachineVerifier::verifyStackFrame() {
// Update stack state by checking contents of MBB.
for (const auto &I : *MBB) {
if (I.getOpcode() == FrameSetupOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
if (BBState.ExitIsSetup)
report("FrameSetup is after another FrameSetup", &I);
- BBState.ExitValue -= Size;
+ BBState.ExitValue -= TII->getFrameTotalSize(I);
BBState.ExitIsSetup = true;
}
if (I.getOpcode() == FrameDestroyOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
+ int Size = TII->getFrameTotalSize(I);
if (!BBState.ExitIsSetup)
report("FrameDestroy is not after a FrameSetup", &I);
int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
new file mode 100644
index 0000000..633a853
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp
@@ -0,0 +1,153 @@
+//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the implementation of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "machine-scheduler"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors.
+ for (SDep &IDep : SecondSU.Preds)
+ if (IDep.getSUnit() == &FirstSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // predecessors.
+ for (SDep &IDep : FirstSU.Succs)
+ if (IDep.getSUnit() == &SecondSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), &DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), &DAG); dbgs() << " / ";
+ dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " <<
+ DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; );
+
+ if (&SecondSU != &DAG.ExitSU)
+ // Make instructions dependent on FirstSU also dependent on SecondSU to
+ // prevent them from being scheduled between FirstSU and and SecondSU.
+ for (const SDep &SI : FirstSU.Succs) {
+ if (SI.getSUnit() == &SecondSU)
+ continue;
+ DEBUG(dbgs() << " Copy Succ ";
+ SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';);
+ DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ }
+
+ ++NumFused;
+}
+
+namespace {
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ ShouldSchedulePredTy shouldScheduleAdjacent;
+ bool FuseBlock;
+ bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+
+public:
+ MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
+ : shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+} // end anonymous namespace
+
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ if (FuseBlock)
+ // For each of the SUnits in the scheduling block, try to fuse the instr in
+ // it with one in its predecessors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(*DAG, ISU);
+
+ if (DAG->ExitSU.getInstr())
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(*DAG, DAG->ExitSU);
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+ const MachineInstr &AnchorMI = *AnchorSU.getInstr();
+ const TargetInstrInfo &TII = *DAG.TII;
+ const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorSU.Preds) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output ||
+ Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ if (DepSU.isBoundaryNode())
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ continue;
+
+ fuseInstructionPair(DAG, DepSU, AnchorSU);
+ return true;
+ }
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+llvm::createMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+llvm::createBranchMacroFusionDAGMutation(
+ ShouldSchedulePredTy shouldScheduleAdjacent) {
+ if(EnableMacroFusion)
+ return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index 2a8531f..f7aeb42 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -12,18 +12,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "phi-opt"
+#define DEBUG_TYPE "opt-phis"
STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
@@ -59,7 +59,7 @@ namespace {
char OptimizePHIs::ID = 0;
char &llvm::OptimizePHIsID = OptimizePHIs::ID;
-INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE,
"Optimize machine instruction PHIs", false, false)
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index c67a25b..9c898fa 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -34,7 +34,7 @@
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "phielim"
+#define DEBUG_TYPE "phi-node-elimination"
static cl::opt<bool>
DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
@@ -112,11 +112,11 @@ STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
char& llvm::PHIEliminationID = PHIElimination::ID;
-INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE,
"Eliminate PHI nodes for register allocation",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE,
"Eliminate PHI nodes for register allocation", false, false)
void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
index ad9166f..513e827 100644
--- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -75,7 +75,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
.addImm(FirstActualI->getOpcode());
for (auto &MO : FirstActualI->operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
FirstActualI->eraseFromParent();
MF.ensureAlignment(4);
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 6d64345..b13f6b6 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -66,7 +66,6 @@
// C = copy A <-- same-bank copy
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -77,8 +76,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -119,6 +120,14 @@ static cl::opt<unsigned> RewritePHILimit(
"rewrite-phi-limit", cl::Hidden, cl::init(10),
cl::desc("Limit the length of PHI chains to lookup"));
+// Limit the length of recurrence chain when evaluating the benefit of
+// commuting operands.
+static cl::opt<unsigned> MaxRecurrenceChain(
+ "recurrence-chain-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum length of recurrence chain when evaluating the benefit "
+ "of commuting operands"));
+
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -131,12 +140,14 @@ STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
class ValueTrackerResult;
+ class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
MachineDominatorTree *DT; // Machine dominator tree
+ MachineLoopInfo *MLI;
public:
static char ID; // Pass identification
@@ -150,6 +161,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
if (Aggressive) {
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -160,6 +173,9 @@ namespace {
typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
RewriteMapTy;
+ /// \brief Sequence of instructions that formulate recurrence cycle.
+ typedef SmallVector<RecurrenceInstr, 4> RecurrenceCycle;
+
private:
bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -170,6 +186,7 @@ namespace {
bool optimizeCoalescableCopy(MachineInstr *MI);
bool optimizeUncoalescableCopy(MachineInstr *MI,
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(unsigned Reg, unsigned SubReg,
RewriteMapTy &RewriteMap);
bool isMoveImmediate(MachineInstr *MI,
@@ -178,6 +195,13 @@ namespace {
bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ /// \brief Finds recurrence cycles, but only ones that formulated around
+ /// a def operand and a use operand that are tied. If there is a use
+ /// operand commutable with the tied use operand, find recurrence cycle
+ /// along that operand as well.
+ bool findTargetRecurrence(unsigned Reg,
+ const SmallSet<unsigned, 2> &TargetReg,
+ RecurrenceCycle &RC);
/// \brief If copy instruction \p MI is a virtual register copy, track it in
/// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
@@ -222,6 +246,28 @@ namespace {
}
};
+ /// \brief Helper class to hold instructions that are inside recurrence
+ /// cycles. The recurrence cycle is formulated around 1) a def operand and its
+ /// tied use operand, or 2) a def operand and a use operand that is commutable
+ /// with another use operand which is tied to the def operand. In the latter
+ /// case, index of the tied use operand and the commutable use operand are
+ /// maintained with CommutePair.
+ class RecurrenceInstr {
+ public:
+ typedef std::pair<unsigned, unsigned> IndexPair;
+
+ RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
+ RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
+ : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
+
+ MachineInstr *getMI() const { return MI; }
+ Optional<IndexPair> getCommutePair() const { return CommutePair; }
+
+ private:
+ MachineInstr *MI;
+ Optional<IndexPair> CommutePair;
+ };
+
/// \brief Helper class to hold a reply for ValueTracker queries. Contains the
/// returned sources for a given search and the instructions where the sources
/// were tracked from.
@@ -412,6 +458,7 @@ char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
@@ -1487,6 +1534,113 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
+/// \bried Returns true if \p MO is a virtual register operand.
+static bool isVirtualRegisterOperand(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+}
+
+bool PeepholeOptimizer::findTargetRecurrence(
+ unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ RecurrenceCycle &RC) {
+ // Recurrence found if Reg is in TargetRegs.
+ if (TargetRegs.count(Reg))
+ return true;
+
+ // TODO: Curerntly, we only allow the last instruction of the recurrence
+ // cycle (the instruction that feeds the PHI instruction) to have more than
+ // one uses to guarantee that commuting operands does not tie registers
+ // with overlapping live range. Once we have actual live range info of
+ // each register, this constraint can be relaxed.
+ if (!MRI->hasOneNonDBGUse(Reg))
+ return false;
+
+ // Give up if the reccurrence chain length is longer than the limit.
+ if (RC.size() >= MaxRecurrenceChain)
+ return false;
+
+ MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
+ unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
+
+ // Only interested in recurrences whose instructions have only one def, which
+ // is a virtual register.
+ if (MI.getDesc().getNumDefs() != 1)
+ return false;
+
+ MachineOperand &DefOp = MI.getOperand(0);
+ if (!isVirtualRegisterOperand(DefOp))
+ return false;
+
+ // Check if def operand of MI is tied to any use operand. We are only
+ // interested in the case that all the instructions in the recurrence chain
+ // have there def operand tied with one of the use operand.
+ unsigned TiedUseIdx;
+ if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
+ return false;
+
+ if (Idx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ } else {
+ // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
+ unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
+ RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
+ return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
+ }
+ }
+
+ return false;
+}
+
+/// \brief Phi instructions will eventually be lowered to copy instructions. If
+/// phi is in a loop header, a recurrence may formulated around the source and
+/// destination of the phi. For such case commuting operands of the instructions
+/// in the recurrence may enable coalescing of the copy instruction generated
+/// from the phi. For example, if there is a recurrence of
+///
+/// LoopHeader:
+/// %vreg1 = phi(%vreg0, %vreg100)
+/// LoopLatch:
+/// %vreg0<def, tied1> = ADD %vreg2<def, tied0>, %vreg1
+///
+/// , the fact that vreg0 and vreg2 are in the same tied operands set makes
+/// the coalescing of copy instruction generated from the phi in
+/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and
+/// %vreg2 have overlapping live range. This introduces additional move
+/// instruction to the final assembly. However, if we commute %vreg2 and
+/// %vreg1 of ADD instruction, the redundant move instruction can be
+/// avoided.
+bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
+ SmallSet<unsigned, 2> TargetRegs;
+ for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
+ MachineOperand &MO = PHI.getOperand(Idx);
+ assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
+ TargetRegs.insert(MO.getReg());
+ }
+
+ bool Changed = false;
+ RecurrenceCycle RC;
+ if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
+ // Commutes operands of instructions in RC if necessary so that the copy to
+ // be generated from PHI can be coalesced.
+ DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
+ for (auto &RI : RC) {
+ DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
+ auto CP = RI.getCommutePair();
+ if (CP) {
+ Changed = true;
+ TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
+ (*CP).second);
+ DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1501,6 +1655,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+ MLI = &getAnalysis<MachineLoopInfo>();
bool Changed = false;
@@ -1529,6 +1684,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
SmallSet<unsigned, 4> CopySrcRegs;
DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ bool IsLoopHeader = MLI->isLoopHeader(&MBB);
+
for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
MII != MIE; ) {
MachineInstr *MI = &*MII;
@@ -1540,9 +1697,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- if (MI->isPosition() || MI->isPHI())
+ if (MI->isPosition())
continue;
+ if (IsLoopHeader && MI->isPHI()) {
+ if (optimizeRecurrence(*MI)) {
+ Changed = true;
+ continue;
+ }
+ }
+
if (!MI->isCopy()) {
for (const auto &Op : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
@@ -1667,7 +1831,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
-
+
// MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
MI = FoldMI;
@@ -1675,7 +1839,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
// If we run into an instruction we can't fold across, discard
// the load candidates. Note: We might be able to fold *into* this
// instruction, so this needs to be after the folding logic.
diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 5bc5f75..4a50d89 100644
--- a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -23,13 +23,13 @@
/// This pass traverses all the instructions in a program in top-down order.
/// In contrast to the instruction scheduling passes, this pass never resets
/// the hazard recognizer to ensure it can correctly handles noop hazards at
-/// the begining of blocks.
+/// the beginning of blocks.
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 6081916..f2249f9 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -200,7 +200,7 @@ namespace {
char &llvm::PostRASchedulerID = PostRAScheduler::ID;
-INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE,
"Post RA top-down list latency scheduler", false, false)
SchedulePostRATDList::SchedulePostRATDList(
@@ -253,7 +253,7 @@ void SchedulePostRATDList::exitRegion() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
-void SchedulePostRATDList::dumpSchedule() const {
+LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
SU->dump(this);
@@ -367,7 +367,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.finishBlock();
// Update register kills
- Scheduler.fixupKills(&MBB);
+ Scheduler.fixupKills(MBB);
}
return true;
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index d27ea2f..0118580 100644
--- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
-#define DEBUG_TYPE "processimplicitdefs"
+#define DEBUG_TYPE "processimpdefs"
namespace {
/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
@@ -51,9 +51,7 @@ public:
char ProcessImplicitDefs::ID = 0;
char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
-INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
- "Process Implicit Definitions", false, false)
-INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE,
"Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 5fca7fa..e9f8d43 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,7 +45,7 @@
using namespace llvm;
-#define DEBUG_TYPE "pei"
+#define DEBUG_TYPE "prologepilog"
typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
@@ -54,25 +54,12 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks);
-static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS);
-
namespace {
class PEI : public MachineFunctionPass {
public:
static char ID;
- explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) {
+ PEI() : MachineFunctionPass(ID) {
initializePEIPass(*PassRegistry::getPassRegistry());
-
- if (TM && (!TM->usesPhysRegsForPEI())) {
- SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
- unsigned &, unsigned &, const MBBVector &,
- const MBBVector &) {};
- ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
- } else {
- SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
- ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
- UsesCalleeSaves = true;
- }
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -95,7 +82,7 @@ private:
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks)>
SpillCalleeSavedRegisters;
- std::function<void(MachineFunction &MF, RegScavenger *RS)>
+ std::function<void(MachineFunction &MF, RegScavenger &RS)>
ScavengeFrameVirtualRegs;
bool UsesCalleeSaves = false;
@@ -140,21 +127,19 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
cl::desc("Warn for stack size bigger than the given"
" number"));
-INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion",
- false, false)
+INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_TM_PASS_END(PEI, "prologepilog",
- "Prologue/Epilogue Insertion & Frame Finalization",
- false, false)
+INITIALIZE_PASS_END(PEI, DEBUG_TYPE,
+ "Prologue/Epilogue Insertion & Frame Finalization", false,
+ false)
-MachineFunctionPass *
-llvm::createPrologEpilogInserterPass(const TargetMachine *TM) {
- return new PEI(TM);
+MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
+ return new PEI();
}
-STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
@@ -174,6 +159,20 @@ typedef SmallSetVector<int, 8> StackObjSet;
/// frame indexes with appropriate references.
///
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ if (!SpillCalleeSavedRegisters) {
+ const TargetMachine &TM = Fn.getTarget();
+ if (!TM.usesPhysRegsForPEI()) {
+ SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
+ unsigned &, unsigned &, const MBBVector &,
+ const MBBVector &) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
+ } else {
+ SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
+ ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
+ UsesCalleeSaves = true;
+ }
+ }
+
const Function* F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
@@ -220,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
- ScavengeFrameVirtualRegs(Fn, RS);
+ ScavengeFrameVirtualRegs(Fn, *RS);
// Clear any vregs created by virtual scavenging.
Fn.getRegInfo().clearVirtRegs();
@@ -265,11 +264,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
std::vector<MachineBasicBlock::iterator> FrameSDOps;
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
- " instructions should have a single immediate argument!");
- unsigned Size = I->getOperand(0).getImm();
+ if (TII.isFrameInstr(*I)) {
+ unsigned Size = TII.getFrameSize(*I);
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
AdjustsStack = true;
FrameSDOps.push_back(I);
@@ -280,6 +276,9 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
AdjustsStack = true;
}
+ assert(!MFI.isMaxCallFrameSizeComputed() ||
+ (MFI.getMaxCallFrameSize() == MaxCallFrameSize &&
+ MFI.adjustsStack() == AdjustsStack));
MFI.setAdjustsStack(AdjustsStack);
MFI.setMaxCallFrameSize(MaxCallFrameSize);
@@ -336,7 +335,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
return;
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+ const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -376,22 +375,22 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
FixedSlot->Reg != Reg)
++FixedSlot;
+ unsigned Size = RegInfo->getSpillSize(*RC);
if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
// Nope, just spill it anywhere convenient.
- unsigned Align = RC->getAlignment();
+ unsigned Align = RegInfo->getSpillAlignment(*RC);
unsigned StackAlign = TFI->getStackAlignment();
// We may not be able to satisfy the desired alignment specification of
// the TargetRegisterClass if the stack alignment is smaller. Use the
// min.
Align = std::min(Align, StackAlign);
- FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true);
+ FrameIdx = MFI.CreateStackObject(Size, Align, true);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx =
- MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset);
}
CS.setFrameIdx(FrameIdx);
@@ -448,12 +447,13 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
for (MachineBasicBlock *MBB : Visited) {
MCPhysReg Reg = CSI[i].getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
- if (!MBB->isLiveIn(Reg))
+ if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
}
}
@@ -764,6 +764,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
} else if (MaxCSFrameIndex >= MinCSFrameIndex) {
// Be careful about underflow in comparisons agains MinCSFrameIndex.
for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+
unsigned Align = MFI.getObjectAlignment(i);
// Adjust to alignment boundary
Offset = alignTo(Offset, Align, Skew);
@@ -1049,8 +1052,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
- unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
if (RS && FrameIndexEliminationScavenging)
RS->enterBasicBlock(*BB);
@@ -1059,11 +1060,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ if (TII.isFrameInstr(*I)) {
+ InsideCallSequence = TII.isFrameSetup(*I);
SPAdj += TII.getSPAdjust(*I);
-
I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
continue;
}
@@ -1151,90 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
RS->forward(MI);
}
}
-
-/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers
-/// with physical registers. Use the register scavenger to find an
-/// appropriate register to use.
-///
-/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
-/// iterate over the vreg use list, which at this point only contains machine
-/// operands for which eliminateFrameIndex need a new scratch reg.
-static void
-doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
- // Run through the instructions and find any virtual registers.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF) {
- RS->enterBasicBlock(MBB);
-
- int SPAdj = 0;
-
- // The instruction stream may change in the loop, so check MBB.end()
- // directly.
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- // We might end up here again with a NULL iterator if we scavenged a
- // register for which we inserted spill code for definition by what was
- // originally the first instruction in MBB.
- if (I == MachineBasicBlock::iterator(nullptr))
- I = MBB.begin();
-
- const MachineInstr &MI = *I;
- MachineBasicBlock::iterator J = std::next(I);
- MachineBasicBlock::iterator P =
- I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
- : std::prev(I);
-
- // RS should process this instruction before we might scavenge at this
- // location. This is because we might be replacing a virtual register
- // defined by this instruction, and if so, registers killed by this
- // instruction are available, and defined registers are not.
- RS->forward(I);
-
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MO.isDef() && "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
-
- ++NumScavengedRegs;
-
- // Replace this reference to the virtual register with the
- // scratch register.
- assert(ScratchReg && "Missing scratch register!");
- MRI.replaceRegWith(Reg, ScratchReg);
-
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS->setRegUsed(ScratchReg);
- }
-
- // If the scavenger needed to use one of its spill slots, the
- // spill code will have been inserted in between I and J. This is a
- // problem because we need the spill code before I: Move I to just
- // prior to J.
- if (I != std::prev(J)) {
- MBB.splice(J, &MBB, I);
-
- // Before we move I, we need to prepare the RS to visit I again.
- // Specifically, RS will assert if it sees uses of registers that
- // it believes are undefined. Because we have already processed
- // register kills in I, when it visits I again, it will believe that
- // those registers are undefined. To avoid this situation, unprocess
- // the instruction I.
- assert(RS->getCurrentPosition() == I &&
- "The register scavenger has an unexpected position");
- I = P;
- RS->unprocess(P);
- } else
- ++I;
- }
- }
-}
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 804a4c3..b29e62b 100644
--- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -29,7 +29,10 @@ PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[Kind];
+ if (Kind < TargetCustom)
+ O << PSVNames[Kind];
+ else
+ O << "TargetCustom" << Kind;
}
bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index fb49a93..7b4fbac 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -21,13 +21,12 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -134,18 +133,19 @@ void RegAllocBase::allocatePhysRegs() {
if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
- for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
- I != E; ++I) {
- LiveInterval *SplitVirtReg = &LIS->getInterval(*I);
+ for (unsigned Reg : SplitVRegs) {
+ assert(LIS->hasInterval(Reg));
+
+ LiveInterval *SplitVirtReg = &LIS->getInterval(Reg);
assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ assert(SplitVirtReg->empty() && "Non-empty but used interval");
DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
aboutToRemoveInterval(*SplitVirtReg);
LIS->removeInterval(SplitVirtReg->reg);
continue;
}
DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(!SplitVirtReg->empty() && "expecting non-empty interval");
assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index a558e37..7743061 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/PassAnalysisSupport.h"
@@ -58,8 +58,9 @@ namespace {
/// whenever a register is unavailable. This is not practical in production but
/// provides a useful baseline both for measuring other allocators and comparing
/// the speed of the basic algorithm against other styles of allocators.
-class RABasic : public MachineFunctionPass, public RegAllocBase
-{
+class RABasic : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
// context
MachineFunction *MF;
@@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
// selectOrSplit().
BitVector UsableRegs;
+ bool LRE_CanEraseVirtReg(unsigned) override;
+ void LRE_WillShrinkVirtReg(unsigned) override;
+
public:
RABasic();
@@ -121,17 +125,46 @@ char RABasic::ID = 0;
} // end anonymous namespace
+char &llvm::RABasicID = RABasic::ID;
+
+INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
+ false)
+
+bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
RABasic::RABasic(): MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
- initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -176,8 +209,6 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
Q.collectInterferingVRegs();
- if (Q.seenUnspillableVReg())
- return false;
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
@@ -202,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
}
return true;
@@ -261,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index fd759bc..d5538be 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -203,6 +203,8 @@ namespace {
char RAFast::ID = 0;
}
+INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false)
+
/// getStackSpaceFor - This allocates space for the specified virtual register
/// to be held on the stack.
int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
@@ -212,8 +214,9 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return SS; // Already has space allocated?
// Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ unsigned Size = TRI->getSpillSize(*RC);
+ unsigned Align = TRI->getSpillAlignment(*RC);
+ int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
@@ -243,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) {
if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
if (MO.getReg() == LR.PhysReg)
MO.setIsKill();
- else
- LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ // else, don't do anything we are problably redefining a
+ // subreg of this register and given we don't track which
+ // lanes are actually dead, we cannot insert a kill flag here.
+ // Otherwise we may end up in a situation like this:
+ // ... = (MO) physreg:sub1, physreg <implicit-use, kill>
+ // ... <== Here we would allow later pass to reuse physreg:sub1
+ // which is potentially wrong.
+ // LR:sub0 = ...
+ // ... = LR.sub1 <== This is going to use physreg:sub1
}
}
@@ -304,19 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
LiveDbgValueMap[LRI->VirtReg];
for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
MachineInstr *DBG = LRIDbgValues[li];
- const MDNode *Var = DBG->getDebugVariable();
- const MDNode *Expr = DBG->getDebugExpression();
- bool IsIndirect = DBG->isIndirectDebugValue();
- uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
- DebugLoc DL = DBG->getDebugLoc();
- assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
- "Expected inlined-at fields to agree");
- MachineInstr *NewDV =
- BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addFrameIndex(FI)
- .addImm(Offset)
- .addMetadata(Var)
- .addMetadata(Expr);
+ MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index c47cfb1..020e81e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,34 +19,63 @@
#include "SpillPlacement.h"
#include "Spiller.h"
#include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
#include <queue>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -104,13 +133,14 @@ static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
namespace {
+
class RAGreedy : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
// Convenient shortcuts.
- typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
- typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
- typedef SmallSet<unsigned, 16> SmallVirtRegSet;
+ using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
+ using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
+ using SmallVirtRegSet = SmallSet<unsigned, 16>;
// context
MachineFunction *MF;
@@ -125,6 +155,7 @@ class RAGreedy : public MachineFunctionPass,
MachineBlockFrequencyInfo *MBFI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
+ MachineOptimizationRemarkEmitter *ORE;
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
@@ -198,12 +229,12 @@ class RAGreedy : public MachineFunctionPass,
// RegInfo - Keep additional information about each live range.
struct RegInfo {
- LiveRangeStage Stage;
+ LiveRangeStage Stage = RS_New;
// Cascade - Eviction loop prevention. See canEvictInterference().
- unsigned Cascade;
+ unsigned Cascade = 0;
- RegInfo() : Stage(RS_New), Cascade(0) {}
+ RegInfo() = default;
};
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
@@ -229,10 +260,10 @@ class RAGreedy : public MachineFunctionPass,
/// Cost of evicting interference.
struct EvictionCost {
- unsigned BrokenHints; ///< Total number of broken hints.
- float MaxWeight; ///< Maximum spill weight evicted.
+ unsigned BrokenHints = 0; ///< Total number of broken hints.
+ float MaxWeight = 0; ///< Maximum spill weight evicted.
- EvictionCost(): BrokenHints(0), MaxWeight(0) {}
+ EvictionCost() = default;
bool isMax() const { return BrokenHints == ~0u; }
@@ -282,8 +313,7 @@ class RAGreedy : public MachineFunctionPass,
// Set B[i] = C for every live bundle where B[i] was NoCand.
unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
unsigned Count = 0;
- for (int i = LiveBundles.find_first(); i >= 0;
- i = LiveBundles.find_next(i))
+ for (unsigned i : LiveBundles.set_bits())
if (B[i] == NoCand) {
B[i] = C;
Count++;
@@ -411,15 +441,32 @@ private:
/// Its currently assigned register.
/// In case of a physical register Reg == PhysReg.
unsigned PhysReg;
+
HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
: Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
};
- typedef SmallVector<HintInfo, 4> HintsInfo;
+ using HintsInfo = SmallVector<HintInfo, 4>;
+
BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
void collectHintInfo(unsigned, HintsInfo &);
bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
+
+ /// Compute and report the number of spills and reloads for a loop.
+ void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads, unsigned &Spills,
+ unsigned &FoldedSpills);
+
+ /// Report the number of spills and reloads for each loop.
+ void reportNumberOfSplillsReloads() {
+ for (MachineLoop *L : *Loops) {
+ unsigned Reloads, FoldedReloads, Spills, FoldedSpills;
+ reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills,
+ FoldedSpills);
+ }
+ }
};
+
} // end anonymous namespace
char RAGreedy::ID = 0;
@@ -439,6 +486,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -458,7 +506,6 @@ const char *const RAGreedy::StageName[] = {
// This helps stabilize decisions based on float comparisons.
const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
-
FunctionPass* llvm::createGreedyRegisterAllocator() {
return new RAGreedy();
}
@@ -490,10 +537,10 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
//===----------------------------------------------------------------------===//
// LiveRangeEdit delegate methods
//===----------------------------------------------------------------------===//
@@ -616,7 +663,6 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
return LI;
}
-
//===----------------------------------------------------------------------===//
// Direct Assignment
//===----------------------------------------------------------------------===//
@@ -664,7 +710,6 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
return CheapReg ? CheapReg : PhysReg;
}
-
//===----------------------------------------------------------------------===//
// Interference eviction
//===----------------------------------------------------------------------===//
@@ -679,7 +724,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
MCRegUnitIterator Units(PhysReg, TRI);
for (; Units.isValid(); ++Units) {
// Instantiate a "subquery", not to be confused with the Queries array.
- LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
+ LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
if (subQ.checkInterference())
break;
}
@@ -830,7 +875,11 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
SmallVector<LiveInterval*, 8> Intfs;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ // We usually have the interfering VRegs cached so collectInterferingVRegs()
+ // should be fast, we may need to recalculate if when different physregs
+ // overlap the same register unit so we had different SubRanges queried
+ // against it.
+ Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -932,7 +981,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
return BestPhys;
}
-
//===----------------------------------------------------------------------===//
// Region Splitting
//===----------------------------------------------------------------------===//
@@ -1003,7 +1051,6 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
return SpillPlacer->scanActiveBundles();
}
-
/// addThroughConstraints - Add constraints and links to SpillPlacer from the
/// live-through blocks in Blocks.
void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
@@ -1061,7 +1108,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
unsigned Visited = 0;
#endif
- for (;;) {
+ while (true) {
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
// Find new through blocks in the periphery of PrefRegBundles.
for (int i = 0, e = NewBundles.size(); i != e; ++i) {
@@ -1139,9 +1186,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
}
DEBUG({
- for (int i = Cand.LiveBundles.find_first(); i>=0;
- i = Cand.LiveBundles.find_next(i))
- dbgs() << " EB#" << i;
+ for (int i : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << i;
dbgs() << ".\n";
});
return true;
@@ -1176,8 +1222,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
- bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
- bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
unsigned Ins = 0;
if (BI.LiveIn)
@@ -1190,8 +1236,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
unsigned Number = Cand.ActiveBlocks[i];
- bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
- bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
if (!RegIn && !RegOut)
continue;
if (RegIn && RegOut) {
@@ -1243,7 +1289,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
unsigned IntvIn = 0, IntvOut = 0;
SlotIndex IntfIn, IntfOut;
if (BI.LiveIn) {
- unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
if (CandIn != NoCand) {
GlobalSplitCandidate &Cand = GlobalCand[CandIn];
IntvIn = Cand.IntvIdx;
@@ -1252,7 +1298,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
}
}
if (BI.LiveOut) {
- unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
if (CandOut != NoCand) {
GlobalSplitCandidate &Cand = GlobalCand[CandOut];
IntvOut = Cand.IntvIdx;
@@ -1292,7 +1338,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
unsigned IntvIn = 0, IntvOut = 0;
SlotIndex IntfIn, IntfOut;
- unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
if (CandIn != NoCand) {
GlobalSplitCandidate &Cand = GlobalCand[CandIn];
IntvIn = Cand.IntvIdx;
@@ -1300,7 +1346,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
IntfIn = Cand.Intf.first();
}
- unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
if (CandOut != NoCand) {
GlobalSplitCandidate &Cand = GlobalCand[CandOut];
IntvOut = Cand.IntvIdx;
@@ -1459,8 +1505,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
DEBUG({
dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
<< " with bundles";
- for (int i = Cand.LiveBundles.find_first(); i>=0;
- i = Cand.LiveBundles.find_next(i))
+ for (int i : Cand.LiveBundles.set_bits())
dbgs() << " EB#" << i;
dbgs() << ".\n";
});
@@ -1513,7 +1558,6 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
return 0;
}
-
//===----------------------------------------------------------------------===//
// Per-Block Splitting
//===----------------------------------------------------------------------===//
@@ -1560,7 +1604,6 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
}
-
//===----------------------------------------------------------------------===//
// Per-Instruction Splitting
//===----------------------------------------------------------------------===//
@@ -1644,12 +1687,10 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
return 0;
}
-
//===----------------------------------------------------------------------===//
// Local Splitting
//===----------------------------------------------------------------------===//
-
/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
/// in order to use PhysReg between two entries in SA->UseSlots.
///
@@ -1720,7 +1761,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
for (; Gap != NumGaps; ++Gap) {
- GapWeight[Gap] = llvm::huge_valf;
+ GapWeight[Gap] = huge_valf;
if (Uses[Gap+1].getBaseIndex() >= I->end)
break;
}
@@ -1826,7 +1867,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Remove any gaps with regmask clobbers.
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
- GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
+ GapWeight[RegMaskGaps[i]] = huge_valf;
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -1838,7 +1879,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// It is the spill weight that needs to be evicted.
float MaxGap = GapWeight[0];
- for (;;) {
+ while (true) {
// Live before/after split?
const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
@@ -1861,7 +1902,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Legally, without causing looping?
bool Legal = !ProgressRequired || NewGaps < NumGaps;
- if (Legal && MaxGap < llvm::huge_valf) {
+ if (Legal && MaxGap < huge_valf) {
// Estimate the new spill weight. Each instruction reads or writes the
// register. Conservatively assume there are no read-modify-write
// instructions.
@@ -2417,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
do {
Reg = RecoloringCandidates.pop_back_val();
- // We cannot recolor physcal register.
+ // We cannot recolor physical register.
if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
@@ -2581,7 +2622,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
}
// If we couldn't allocate a register from spilling, there is probably some
- // invalid inline assembly. The base class wil report it.
+ // invalid inline assembly. The base class will report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
Depth);
@@ -2611,6 +2652,70 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}
+void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads,
+ unsigned &Spills,
+ unsigned &FoldedSpills) {
+ Reloads = 0;
+ FoldedReloads = 0;
+ Spills = 0;
+ FoldedSpills = 0;
+
+ // Sum up the spill and reloads in subloops.
+ for (MachineLoop *SubLoop : *L) {
+ unsigned SubReloads;
+ unsigned SubFoldedReloads;
+ unsigned SubSpills;
+ unsigned SubFoldedSpills;
+
+ reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads,
+ SubSpills, SubFoldedSpills);
+ Reloads += SubReloads;
+ FoldedReloads += SubFoldedReloads;
+ Spills += SubSpills;
+ FoldedSpills += SubFoldedSpills;
+ }
+
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ int FI;
+
+ for (MachineBasicBlock *MBB : L->getBlocks())
+ // Handle blocks that were not included in subloops.
+ if (Loops->getLoopFor(MBB) == L)
+ for (MachineInstr &MI : *MBB) {
+ const MachineMemOperand *MMO;
+
+ if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
+ ++Reloads;
+ else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedReloads;
+ else if (TII->isStoreToStackSlot(MI, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++Spills;
+ else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedSpills;
+ }
+
+ if (Reloads || FoldedReloads || Spills || FoldedSpills) {
+ using namespace ore;
+
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+ L->getStartLoc(), L->getHeader());
+ if (Spills)
+ R << NV("NumSpills", Spills) << " spills ";
+ if (FoldedSpills)
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ if (Reloads)
+ R << NV("NumReloads", Reloads) << " reloads ";
+ if (FoldedReloads)
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ ORE->emit(R << "generated in loop");
+ }
+}
+
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
@@ -2633,6 +2738,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
@@ -2658,6 +2764,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
postOptimization();
+ reportNumberOfSplillsReloads();
releaseMemory();
return true;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 101b30b..9778103 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,4 +1,4 @@
-//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -32,30 +32,59 @@
#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Math.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
#include <limits>
+#include <map>
#include <memory>
#include <queue>
#include <set>
#include <sstream>
+#include <string>
+#include <system_error>
+#include <tuple>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -86,7 +115,6 @@ namespace {
/// Programming problems.
class RegAllocPBQP : public MachineFunctionPass {
public:
-
static char ID;
/// Construct a PBQP register allocator.
@@ -113,14 +141,13 @@ public:
}
private:
-
- typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
- typedef std::vector<const LiveInterval*> Node2LIMap;
- typedef std::vector<unsigned> AllowedSet;
- typedef std::vector<AllowedSet> AllowedSetMap;
- typedef std::pair<unsigned, unsigned> RegPair;
- typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::set<unsigned> RegSet;
+ using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
+ using Node2LIMap = std::vector<const LiveInterval *>;
+ using AllowedSet = std::vector<unsigned>;
+ using AllowedSetMap = std::vector<AllowedSet>;
+ using RegPair = std::pair<unsigned, unsigned>;
+ using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
+ using RegSet = std::set<unsigned>;
char *customPassID;
@@ -187,13 +214,12 @@ public:
/// @brief Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
-
- typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
- typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
- typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
- typedef DenseSet<IKey> DisjointAllowedRegsCache;
- typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
- typedef DenseSet<IEdgeKey> IEdgeCache;
+ using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
+ using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
+ using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
+ using DisjointAllowedRegsCache = DenseSet<IKey>;
+ using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
+ using IEdgeCache = DenseSet<IEdgeKey>;
bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
PBQPRAGraph::NodeId MId,
@@ -228,8 +254,8 @@ private:
// for the fast interference graph construction algorithm. The last is there
// to save us from looking up node ids via the VRegToNode map in the graph
// metadata.
- typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
- IntervalInfo;
+ using IntervalInfo =
+ std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
static SlotIndex getStartPoint(const IntervalInfo &I) {
return std::get<0>(I)->segments[std::get<1>(I)].start;
@@ -276,7 +302,6 @@ private:
}
public:
-
void apply(PBQPRAGraph &G) override {
// The following is loosely based on the linear scan algorithm introduced in
// "Linear Scan Register Allocation" by Poletto and Sarkar. This version
@@ -297,9 +322,10 @@ public:
// Cache known disjoint allowed registers pairs
DisjointAllowedRegsCache D;
- typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
- typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
- decltype(&lowestStartPoint)> IntervalQueue;
+ using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
+ using IntervalQueue =
+ std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)>;
IntervalSet Active(lowestEndPoint);
IntervalQueue Inactive(lowestStartPoint);
@@ -363,7 +389,6 @@ public:
}
private:
-
// Create an Interference edge and add it to the graph, unless it is
// a null matrix, meaning the nodes' allowed registers do not have any
// interference. This case occurs frequently between integer and floating
@@ -372,7 +397,6 @@ private:
bool createInterferenceEdge(PBQPRAGraph &G,
PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
IMatrixCache &C) {
-
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
@@ -409,7 +433,6 @@ private:
}
};
-
class Coalescing : public PBQPRAConstraint {
public:
void apply(PBQPRAGraph &G) override {
@@ -421,7 +444,6 @@ public:
// gives the Ok.
for (const auto &MBB : MF) {
for (const auto &MI : MBB) {
-
// Skip not-coalescable or already coalesced copies.
if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
continue;
@@ -479,7 +501,6 @@ public:
}
private:
-
void addVirtRegCoalesce(
PBQPRAGraph::RawMatrix &CostMat,
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
@@ -496,14 +517,15 @@ private:
}
}
}
-
};
-} // End anonymous namespace.
+} // end anonymous namespace
// Out-of-line destructor/anchor for PBQPRAConstraint.
-PBQPRAConstraint::~PBQPRAConstraint() {}
+PBQPRAConstraint::~PBQPRAConstraint() = default;
+
void PBQPRAConstraint::anchor() {}
+
void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
@@ -554,7 +576,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
- const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
if (TRI.regsOverlap(reg, CSR[i]))
return true;
@@ -639,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
SmallVectorImpl<unsigned> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM, Spiller &VRegSpiller) {
-
VRegsToAlloc.erase(VReg);
LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
nullptr, &DeadRemats);
@@ -717,7 +738,15 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
if (PReg == 0) {
const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
- PReg = RC.getRawAllocationOrder(MF).front();
+ const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
+ for (unsigned CandidateReg : RawPRegOrder) {
+ if (!VRM.getRegInfo().isReserved(CandidateReg)) {
+ PReg = CandidateReg;
+ break;
+ }
+ }
+ assert(PReg &&
+ "No un-reserved physical registers in this register class");
}
VRM.assignVirt2Phys(LI.reg, PReg);
@@ -777,7 +806,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// If there are non-empty intervals allocate them using pbqp.
if (!VRegsToAlloc.empty()) {
-
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
llvm::make_unique<PBQPRAConstraintList>();
@@ -840,7 +868,8 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
});
}
-void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
for (auto NId : nodeIds()) {
const Vector &Costs = getNodeCosts(NId);
assert(Costs.getLength() != 0 && "Empty vector in graph.");
@@ -861,7 +890,10 @@ void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
}
}
-LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
+ dump(dbgs());
+}
+#endif
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
OS << "graph {\n";
@@ -892,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
return createPBQPRegisterAllocator();
}
-
-#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index ece44c2..855aa37 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -103,9 +103,27 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Clobbered Registers: ");
- for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
- if (MRI->isPhysRegModified(PReg, true))
- RegMask[PReg / 32] &= ~(1u << PReg % 32);
+ const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
+ auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
+ RegMask[Reg / 32] &= ~(1u << Reg % 32);
+ };
+ // Scan all the physical registers. When a register is defined in the current
+ // function set it and all the aliasing registers as defined in the regmask.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // If a register is in the UsedPhysRegsMask set then mark it as defined.
+ // All it's aliases will also be in the set, so we can skip setting
+ // as defined all the aliases here.
+ if (UsedPhysRegsMask.test(PReg)) {
+ SetRegAsDefined(PReg);
+ continue;
+ }
+ // If a register is defined by an instruction mark it as defined together
+ // with all it's aliases.
+ if (!MRI->def_empty(PReg)) {
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+ }
+ }
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 178fa18..956dec3 100644
--- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,11 +15,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -29,8 +39,7 @@ static cl::opt<unsigned>
StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
cl::desc("Limit all regclasses to N registers"));
-RegisterClassInfo::RegisterClassInfo()
- : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
+RegisterClassInfo::RegisterClassInfo() = default;
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
@@ -48,18 +57,20 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
// Does this MF have different CSRs?
assert(TRI && "no register info set");
- const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
- if (Update || CSR != CalleeSaved) {
- // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+
+ // Get the callee saved registers.
+ const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ if (Update || CSR != CalleeSavedRegs) {
+ // Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CSRNum.clear();
- CSRNum.resize(TRI->getNumRegs(), 0);
- for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CalleeSavedAliases[*AI] = *I;
+
Update = true;
}
- CalleeSaved = CSR;
+ CalleeSavedRegs = CSR;
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
@@ -103,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CSRNum[PhysReg])
+ if (CalleeSavedAliases[PhysReg])
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
@@ -114,7 +125,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
}
}
RCI.NumRegs = N + CSRAlias.size();
- assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+ assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
// CSR aliases go after the volatile registers, preserve the target's order.
for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
@@ -156,9 +167,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
const TargetRegisterClass *RC = nullptr;
unsigned NumRCUnits = 0;
- for (TargetRegisterInfo::regclass_iterator
- RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
- const int *PSetID = TRI->getRegClassPressureSets(*RI);
+ for (const TargetRegisterClass *C : TRI->regclasses()) {
+ const int *PSetID = TRI->getRegClassPressureSets(C);
for (; *PSetID != -1; ++PSetID) {
if ((unsigned)*PSetID == Idx)
break;
@@ -168,9 +178,9 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
// Found a register class that counts against this pressure set.
// For efficiency, only compute the set order for the largest set.
- unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit;
+ unsigned NUnits = TRI->getRegClassWeight(C).WeightLimit;
if (!RC || NUnits > NumRCUnits) {
- RC = *RI;
+ RC = C;
NumRCUnits = NUnits;
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 4bb3c22..a67d07b 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -189,6 +190,9 @@ namespace {
/// This returns true if an interval was modified.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ /// We found a copy which can be moved to its less frequent predecessor.
+ bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
+
/// If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
@@ -811,42 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
- LaneBitmask AMask = SA.LaneMask;
- for (LiveInterval::SubRange &SB : IntB.subranges()) {
- LaneBitmask BMask = SB.LaneMask;
- LaneBitmask Common = BMask & AMask;
- if (Common.none())
- continue;
-
- DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
- << " into " << PrintLaneMask(Common) << '\n');
- LaneBitmask BRest = BMask & ~AMask;
- LiveInterval::SubRange *CommonRange;
- if (BRest.any()) {
- SB.LaneMask = BRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
- << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
- } else {
- // We van reuse the L SubRange.
- SB.LaneMask = Common;
- CommonRange = &SB;
- }
- LiveRange RangeCopy(SB, Allocator);
-
- VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx);
- assert(BSubValNo->def == CopyIdx);
- BSubValNo->def = ASubValNo->def;
- addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
- AMask &= ~BMask;
- }
- if (AMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
- LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
- VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
- addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
- }
+ IntB.refineSubRanges(Allocator, SA.LaneMask,
+ [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty()
+ ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ });
}
}
@@ -861,6 +837,191 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
+/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
+/// predecessor of BB2, and if B is not redefined on the way from A = B
+/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// execution goes through the path from BB0 to BB2. We may move B = A
+/// to the predecessor without such reversed copy.
+/// So we will transform the program from:
+/// BB0:
+/// A = B; BB1:
+/// ... ...
+/// / \ /
+/// BB2:
+/// ...
+/// B = A;
+///
+/// to:
+///
+/// BB0: BB1:
+/// A = B; ...
+/// ... B = A;
+/// / \ /
+/// BB2:
+/// ...
+///
+/// A special case is when BB0 and BB2 are the same BB which is the only
+/// BB in a loop:
+/// BB1:
+/// ...
+/// BB0/BB2: ----
+/// B = A; |
+/// ... |
+/// A = B; |
+/// |-------
+/// |
+/// We may hoist B = A from BB0/BB2 to BB1.
+///
+/// The major preconditions for correctness to remove such partial
+/// redundancy include:
+/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
+/// the PHI is defined by the reversed copy A = B in BB0.
+/// 2. No B is referenced from the start of BB2 to B = A.
+/// 3. No B is defined from A = B to the end of BB0.
+/// 4. BB1 has only one successor.
+///
+/// 2 and 4 implicitly ensure B is not live at the end of BB1.
+/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
+/// colder place, which not only prevent endless loop, but also make sure
+/// the movement of copy is beneficial.
+bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
+ MachineInstr &CopyMI) {
+ assert(!CP.isPhys());
+ if (!CopyMI.isFullCopy())
+ return false;
+
+ MachineBasicBlock &MBB = *CopyMI.getParent();
+ if (MBB.isEHPad())
+ return false;
+
+ if (MBB.pred_size() != 2)
+ return false;
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // A is defined by PHI at the entry of MBB.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (!AValNo->isPHIDef())
+ return false;
+
+ // No B is referenced before CopyMI in MBB.
+ if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
+ return false;
+
+ // MBB has two predecessors: one contains A = B so no copy will be inserted
+ // for it. The other one will have a copy moved from MBB.
+ bool FoundReverseCopy = false;
+ MachineBasicBlock *CopyLeftBB = nullptr;
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
+ if (!DefMI || !DefMI->isFullCopy()) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // Check DefMI is a reverse copy and it is in BB Pred.
+ if (DefMI->getOperand(0).getReg() != IntA.reg ||
+ DefMI->getOperand(1).getReg() != IntB.reg ||
+ DefMI->getParent() != Pred) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // If there is any other def of B after DefMI and before the end of Pred,
+ // we need to keep the copy of B = A at the end of Pred if we remove
+ // B = A from MBB.
+ bool ValB_Changed = false;
+ for (auto VNI : IntB.valnos) {
+ if (VNI->isUnused())
+ continue;
+ if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
+ ValB_Changed = true;
+ break;
+ }
+ }
+ if (ValB_Changed) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ FoundReverseCopy = true;
+ }
+
+ // If no reverse copy is found in predecessors, nothing to do.
+ if (!FoundReverseCopy)
+ return false;
+
+ // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
+ // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
+ // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
+ // update IntA/IntB.
+ //
+ // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
+ // MBB is hotter than CopyLeftBB.
+ if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
+ return false;
+
+ // Now ok to move copy.
+ if (CopyLeftBB) {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
+ << CopyLeftBB->getNumber() << '\t' << CopyMI);
+
+ // Insert new copy to CopyLeftBB.
+ auto InsPos = CopyLeftBB->getFirstTerminator();
+ MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IntB.reg)
+ .addReg(IntA.reg);
+ SlotIndex NewCopyIdx =
+ LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
+ IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ for (LiveInterval::SubRange &SR : IntB.subranges())
+ SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+
+ // If the newly created Instruction has an address of an instruction that was
+ // deleted before (object recycled by the allocator) it needs to be removed from
+ // the deleted list.
+ ErasedInstrs.erase(NewCopyMI);
+ } else {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
+ << MBB.getNumber() << '\t' << CopyMI);
+ }
+
+ // Remove CopyMI.
+ // Note: This is fine to remove the copy before updating the live-ranges.
+ // While updating the live-ranges, we only look at slot indices and
+ // never go back to the instruction.
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ // Mark instructions as deleted.
+ ErasedInstrs.insert(&CopyMI);
+ CopyMI.eraseFromParent();
+
+ // Update the liveness.
+ SmallVector<SlotIndex, 8> EndPoints;
+ VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
+ LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
+ &EndPoints);
+ BValNo->markUnused();
+ // Extend IntB to the EndPoints of its original live interval.
+ LIS->extendToIndices(IntB, EndPoints);
+
+ // Now, do the same for its subranges.
+ for (LiveInterval::SubRange &SR : IntB.subranges()) {
+ EndPoints.clear();
+ VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
+ assert(BValNo && "All sublanes should be live");
+ LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
+ BValNo->markUnused();
+ LIS->extendToIndices(SR, EndPoints);
+ }
+
+ // Finally, update the live-range of IntA.
+ shrinkToUses(&IntA);
+ return true;
+}
+
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
@@ -1066,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SR->createDeadDef(DefIndex, Alloc);
}
}
+
+ // Make sure that the subrange for resultant undef is removed
+ // For example:
+ // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1
+ // vreg2<def> = COPY vreg1
+ // ==>
+ // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1
+ // ; Correct but need to remove the subrange for vreg2:sub0
+ // ; as it is now undef
+ if (NewIdx != 0 && DstInt.hasSubRanges()) {
+ // The affected subregister segments can be removed.
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
+ bool UpdatedSubRanges = false;
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if ((SR.LaneMask & DstMask).none()) {
+ DEBUG(dbgs() << "Removing undefined SubRange "
+ << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
+ // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
+ if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
+ SR.removeValNo(RmValNo);
+ UpdatedSubRanges = true;
+ }
+ }
+ }
+ if (UpdatedSubRanges)
+ DstInt.removeEmptySubRanges();
+ }
} else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
@@ -1290,7 +1479,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// If SrcReg wasn't read, it may still be the case that DstReg is live-in
// because SrcReg is a sub-register.
- if (DstInt && !Reads && SubIdx)
+ if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
@@ -1486,6 +1675,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
}
+ // Try and see if we can partially eliminate the copy by moving the copy to
+ // its predecessor.
+ if (!CP.isPartial() && !CP.isPhys())
+ if (removePartialRedundancy(CP, *CopyMI))
+ return true;
+
// Otherwise, we are unable to join the intervals.
DEBUG(dbgs() << "\tInterference!\n");
Again = true; // May be possible to coalesce later.
@@ -1583,6 +1778,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return false;
}
}
+
+ // We must also check for overlaps with regmask clobbers.
+ BitVector RegMaskUsable;
+ if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
+ !RegMaskUsable.test(DstReg)) {
+ DEBUG(dbgs() << "\t\tRegMask interference\n");
+ return false;
+ }
}
// Skip any value computations, we are not adding new values to the
@@ -1636,14 +1839,6 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
-
- // We must also check for clobbers caused by regmasks.
- for (const auto &MO : MI->operands()) {
- if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
- DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
- return false;
- }
- }
}
}
@@ -2506,11 +2701,17 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Look for values being erased.
bool DidPrune = false;
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
- if (Vals[i].Resolution != CR_Erase)
+ // We should trigger in all cases in which eraseInstrs() does something.
+ // match what eraseInstrs() is doing, print a message so
+ if (Vals[i].Resolution != CR_Erase &&
+ (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef ||
+ !Vals[i].Pruned))
continue;
// Check subranges at the point where the copy will be removed.
SlotIndex Def = LR.getValNumInfo(i)->def;
+ // Print message so mismatches with eraseInstrs() can be diagnosed.
+ DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n');
for (LiveInterval::SubRange &S : LI.subranges()) {
LiveQueryResult Q = S.Query(Def);
@@ -2738,39 +2939,16 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneBitmask LaneMask,
CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- for (LiveInterval::SubRange &R : LI.subranges()) {
- LaneBitmask RMask = R.LaneMask;
- // LaneMask of subregisters common to subrange R and ToMerge.
- LaneBitmask Common = RMask & LaneMask;
- // There is nothing to do without common subregs.
- if (Common.none())
- continue;
-
- DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
- << PrintLaneMask(Common) << '\n');
- // LaneMask of subregisters contained in the R range but not in ToMerge,
- // they have to split into their own subrange.
- LaneBitmask LRest = RMask & ~LaneMask;
- LiveInterval::SubRange *CommonRange;
- if (LRest.any()) {
- R.LaneMask = LRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
+ LI.refineSubRanges(Allocator, LaneMask,
+ [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
} else {
- // Reuse the existing range.
- R.LaneMask = Common;
- CommonRange = &R;
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
- LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
- LaneMask &= ~RMask;
- }
-
- if (LaneMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
- LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
- }
+ });
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -2952,7 +3130,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.erase(CurrList[i])) {
+ if (ErasedInstrs.count(CurrList[i])) {
CurrList[i] = nullptr;
continue;
}
@@ -3077,7 +3255,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
CurrList(WorkList.begin() + PrevSize, WorkList.end());
if (copyCoalesceWorkList(CurrList))
WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
- (MachineInstr*)nullptr), WorkList.end());
+ nullptr), WorkList.end());
}
void RegisterCoalescer::coalesceLocals() {
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index fc84aeb..88e0a3b 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,12 +13,36 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -52,6 +76,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
const TargetRegisterInfo *TRI) {
@@ -97,6 +122,7 @@ void RegPressureTracker::dump() const {
P.dump(TRI);
}
+LLVM_DUMP_METHOD
void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
const char *sep = "";
for (const PressureChange &Change : *this) {
@@ -108,6 +134,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
}
dbgs() << '\n';
}
+#endif
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
LaneBitmask PreviousMask,
@@ -264,7 +291,6 @@ bool RegPressureTracker::isBottomClosed() const {
MachineBasicBlock::const_iterator());
}
-
SlotIndex RegPressureTracker::getCurrSlot() const {
MachineBasicBlock::const_iterator IdxPos =
skipDebugInstructionsForward(CurrPos, MBB->end());
@@ -328,7 +354,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end())
@@ -340,7 +366,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -352,7 +378,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -366,7 +392,7 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I != RegUnits.end()) {
@@ -423,6 +449,8 @@ namespace {
///
/// FIXME: always ignore tied opers
class RegisterOperandsCollector {
+ friend class llvm::RegisterOperands;
+
RegisterOperands &RegOpers;
const TargetRegisterInfo &TRI;
const MachineRegisterInfo &MRI;
@@ -517,11 +545,9 @@ class RegisterOperandsCollector {
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
-
- friend class llvm::RegisterOperands;
};
-} // namespace
+} // end anonymous namespace
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
@@ -674,7 +700,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
assert(Pair.LaneMask.any());
unsigned RegUnit = Pair.RegUnit;
- auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+ auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
return Other.RegUnit == RegUnit;
});
LaneBitmask PrevMask;
@@ -772,9 +798,10 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (!TrackLaneMasks) {
addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
} else {
- auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
- return Other.RegUnit == Reg;
- });
+ auto I =
+ llvm::find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
bool IsRedef = I != LiveUses->end();
if (IsRedef) {
// ignore re-defs here...
@@ -1154,7 +1181,7 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
- if (CritInc > 0 && CritInc <= INT16_MAX) {
+ if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) {
Delta.CriticalMax = PressureChange(PSetID);
Delta.CriticalMax.setUnitInc(CritInc);
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index fdf741f..fc5105a 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,27 +16,39 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterScavenging.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
- for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
- LaneBitmask UnitMask = (*RUI).second;
- if (UnitMask.none() || (LaneMask & UnitMask).any())
- RegUnitsAvailable.reset((*RUI).first);
- }
+ LiveUnits.addRegMasked(Reg, LaneMask);
}
void RegScavenger::init(MachineBasicBlock &MBB) {
@@ -44,6 +56,7 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+ LiveUnits.init(*TRI);
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
@@ -51,45 +64,28 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
- RegUnitsAvailable.resize(NumRegUnits);
KillRegUnits.resize(NumRegUnits);
DefRegUnits.resize(NumRegUnits);
TmpRegUnits.resize(NumRegUnits);
}
this->MBB = &MBB;
- for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
- IE = Scavenged.end(); I != IE; ++I) {
- I->Reg = 0;
- I->Restore = nullptr;
+ for (ScavengedInfo &SI : Scavenged) {
+ SI.Reg = 0;
+ SI.Restore = nullptr;
}
- // All register units start out unused.
- RegUnitsAvailable.set();
-
- // Pristine CSRs are not available.
- BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setRegUsed(I);
-
Tracking = false;
}
-void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
- for (const auto &LI : MBB.liveins())
- setRegUsed(LI.PhysReg, LI.LaneMask);
-}
-
void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
init(MBB);
- setLiveInsUsed(MBB);
+ LiveUnits.addLiveIns(MBB);
}
void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
init(MBB);
- // Merge live-ins of successors to get live-outs.
- for (const MachineBasicBlock *Succ : MBB.successors())
- setLiveInsUsed(*Succ);
+ LiveUnits.addLiveOuts(MBB);
// Move internal iterator at the last instruction of the block.
if (MBB.begin() != MBB.end()) {
@@ -263,34 +259,13 @@ void RegScavenger::backward() {
assert(Tracking && "Must be tracking to determine kills and defs");
const MachineInstr &MI = *MBBI;
- // Defined or clobbered registers are available now.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
- ++RU) {
- for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
- if (MO.clobbersPhysReg(*RURI)) {
- RegUnitsAvailable.set(RU);
- break;
- }
- }
- }
- } else if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- addRegUnits(RegUnitsAvailable, Reg);
- }
- }
- // Mark read registers as unavailable.
- for (const MachineOperand &MO : MI.uses()) {
- if (MO.isReg() && MO.readsReg()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- removeRegUnits(RegUnitsAvailable, Reg);
+ LiveUnits.stepBackward(MI);
+
+ // Expire scavenge spill frameindex uses.
+ for (ScavengedInfo &I : Scavenged) {
+ if (I.Restore == &MI) {
+ I.Reg = 0;
+ I.Restore = nullptr;
}
}
@@ -302,12 +277,9 @@ void RegScavenger::backward() {
}
bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
- if (includeReserved && isReserved(Reg))
- return true;
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- if (!RegUnitsAvailable.test(*RUI))
- return true;
- return false;
+ if (isReserved(Reg))
+ return includeReserved;
+ return !LiveUnits.available(Reg);
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -393,6 +365,86 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
return Survivor;
}
+/// Given the bitvector \p Available of free register units at position
+/// \p From. Search backwards to find a register that is part of \p
+/// Candidates and not used/clobbered until the point \p To. If there is
+/// multiple candidates continue searching and pick the one that is not used/
+/// clobbered for the longest time.
+/// Returns the register and the earliest position we know it to be free or
+/// the position MBB.end() if no register is available.
+static std::pair<MCPhysReg, MachineBasicBlock::iterator>
+findSurvivorBackwards(const MachineRegisterInfo &MRI,
+ MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
+ const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder,
+ bool RestoreAfter) {
+ bool FoundTo = false;
+ MCPhysReg Survivor = 0;
+ MachineBasicBlock::iterator Pos;
+ MachineBasicBlock &MBB = *From->getParent();
+ unsigned InstrLimit = 25;
+ unsigned InstrCountDown = InstrLimit;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ LiveRegUnits Used(TRI);
+
+ for (MachineBasicBlock::iterator I = From;; --I) {
+ const MachineInstr &MI = *I;
+
+ Used.accumulate(MI);
+
+ if (I == To) {
+ // See if one of the registers in RC wasn't used so far.
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg) &&
+ LiveOut.available(Reg))
+ return std::make_pair(Reg, MBB.end());
+ }
+ // Otherwise we will continue up to InstrLimit instructions to find
+ // the register which is not defined/used for the longest time.
+ FoundTo = true;
+ Pos = To;
+ // Note: It was fine so far to start our search at From, however now that
+ // we have to spill, and can only place the restore after From then
+ // add the regs used/defed by std::next(From) to the set.
+ if (RestoreAfter)
+ Used.accumulate(*std::next(From));
+ }
+ if (FoundTo) {
+ if (Survivor == 0 || !Used.available(Survivor)) {
+ MCPhysReg AvilableReg = 0;
+ for (MCPhysReg Reg : AllocationOrder) {
+ if (!MRI.isReserved(Reg) && Used.available(Reg)) {
+ AvilableReg = Reg;
+ break;
+ }
+ }
+ if (AvilableReg == 0)
+ break;
+ Survivor = AvilableReg;
+ }
+ if (--InstrCountDown == 0)
+ break;
+
+ // Keep searching when we find a vreg since the spilled register will
+ // be usefull for this other vreg as well later.
+ bool FoundVReg = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ FoundVReg = true;
+ break;
+ }
+ }
+ if (FoundVReg) {
+ InstrCountDown = InstrLimit;
+ Pos = I;
+ }
+ if (I == MBB.begin())
+ break;
+ }
+ }
+
+ return std::make_pair(Survivor, Pos);
+}
+
static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
@@ -402,46 +454,18 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
return i;
}
-unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I,
- int SPAdj) {
- MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getParent()->getParent();
- // Consider all allocatable registers in the register class initially
- BitVector Candidates = TRI->getAllocatableSet(MF, RC);
-
- // Exclude all the registers being used by the instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
-
- // Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- BitVector Available = getRegsAvailable(RC);
- Available &= Candidates;
- if (Available.any())
- Candidates = Available;
-
- // Find the register whose use is furthest away.
- MachineBasicBlock::iterator UseMI;
- unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
-
- // If we found an unused register there is no reason to spill it.
- if (!isRegUsed(SReg)) {
- DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
- return SReg;
- }
-
+RegScavenger::ScavengedInfo &
+RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+ MachineBasicBlock::iterator Before,
+ MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
+ const MachineFunction &MF = *Before->getParent()->getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned NeedSize = RC->getSize();
- unsigned NeedAlign = RC->getAlignment();
+ unsigned NeedSize = TRI->getSpillSize(RC);
+ unsigned NeedAlign = TRI->getSpillAlignment(RC);
- unsigned SI = Scavenged.size(), Diff = UINT_MAX;
+ unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
for (unsigned I = 0; I < Scavenged.size(); ++I) {
if (Scavenged[I].Reg != 0)
@@ -474,42 +498,303 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
}
// Avoid infinite regress
- Scavenged[SI].Reg = SReg;
+ Scavenged[SI].Reg = Reg;
// If the target knows how to save/restore the register, let it do so;
// otherwise, use the emergency stack spill slot.
- if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
- // Spill the scavenged register before I.
+ if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) {
+ // Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) +
+ TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
": Cannot scavenge register without an emergency spill slot!";
report_fatal_error(Msg.c_str());
}
- TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
- RC, TRI);
- MachineBasicBlock::iterator II = std::prev(I);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
+ &RC, TRI);
+ MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
- RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
+ &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
}
+ return Scavenged[SI];
+}
- Scavenged[SI].Restore = &*std::prev(UseMI);
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ MachineInstr &MI = *I;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates = TRI->getAllocatableSet(MF, RC);
+
+ // Exclude all the registers being used by the instruction.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
- // Doing this here leads to infinite regress.
- // Scavenged[SI].Reg = SReg;
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isRegUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
+ Scavenged.Restore = &*std::prev(UseMI);
DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
"\n");
return SReg;
}
+
+unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+ MachineBasicBlock::iterator To,
+ bool RestoreAfter, int SPAdj) {
+ const MachineBasicBlock &MBB = *To->getParent();
+ const MachineFunction &MF = *MBB.getParent();
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF);
+ std::pair<MCPhysReg, MachineBasicBlock::iterator> P =
+ findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder,
+ RestoreAfter);
+ MCPhysReg Reg = P.first;
+ MachineBasicBlock::iterator SpillBefore = P.second;
+ assert(Reg != 0 && "No register left to scavenge!");
+ // Found an available register?
+ if (SpillBefore != MBB.end()) {
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI)
+ << " until " << *SpillBefore);
+ } else {
+ DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n');
+ }
+ return Reg;
+}
+
+/// Allocate a register for the virtual register \p VReg. The last use of
+/// \p VReg is around the current position of the register scavenger \p RS.
+/// \p ReserveAfter controls whether the scavenged register needs to be reserved
+/// after the current instruction, otherwise it will only be reserved before the
+/// current instruction.
+static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ unsigned VReg, bool ReserveAfter) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+#ifndef NDEBUG
+ // Verify that all definitions and uses are in the same basic block.
+ const MachineBasicBlock *CommonMBB = nullptr;
+ // Real definition for the reg, re-definitions are not considered.
+ const MachineInstr *RealDef = nullptr;
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) {
+ MachineBasicBlock *MBB = MO.getParent()->getParent();
+ if (CommonMBB == nullptr)
+ CommonMBB = MBB;
+ assert(MBB == CommonMBB && "All defs+uses must be in the same basic block");
+ if (MO.isDef()) {
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.readsRegister(VReg, &TRI)) {
+ assert((!RealDef || RealDef == &MI) &&
+ "Can have at most one definition which is not a redefinition");
+ RealDef = &MI;
+ }
+ }
+ }
+ assert(RealDef != nullptr && "Must have at least 1 Def");
+#endif
+
+ // We should only have one definition of the register. However to accommodate
+ // the requirements of two address code we also allow definitions in
+ // subsequent instructions provided they also read the register. That way
+ // we get a single contiguous lifetime.
+ //
+ // Definitions in MRI.def_begin() are unordered, search for the first.
+ MachineRegisterInfo::def_iterator FirstDef =
+ std::find_if(MRI.def_begin(VReg), MRI.def_end(),
+ [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
+ assert(FirstDef != MRI.def_end() &&
+ "Must have one definition that does not redefine vreg");
+ MachineInstr &DefMI = *FirstDef->getParent();
+
+ // The register scavenger will report a free register inserting an emergency
+ // spill/reload if necessary.
+ int SPAdj = 0;
+ const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
+ unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ ReserveAfter, SPAdj);
+ MRI.replaceRegWith(VReg, SReg);
+ ++NumScavengedRegs;
+ return SReg;
+}
+
+/// Allocate (scavenge) vregs inside a single basic block.
+/// Returns true if the target spill callback created new vregs and a 2nd pass
+/// is necessary.
+static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
+ RegScavenger &RS,
+ MachineBasicBlock &MBB) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ RS.enterBasicBlockEnd(MBB);
+
+ unsigned InitialNumVirtRegs = MRI.getNumVirtRegs();
+ bool NextInstructionReadsVReg = false;
+ for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
+ --I;
+ // Move RegScavenger to the position between *I and *std::next(I).
+ RS.backward(I);
+
+ // Look for unassigned vregs in the uses of *std::next(I).
+ if (NextInstructionReadsVReg) {
+ MachineBasicBlock::iterator N = std::next(I);
+ const MachineInstr &NMI = *N;
+ for (const MachineOperand &MO : NMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ // We only care about virtual registers and ignore virtual registers
+ // created by the target callbacks in the process (those will be handled
+ // in a scavenging round).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ if (!MO.readsReg())
+ continue;
+
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, true);
+ N->addRegisterKilled(SReg, &TRI, false);
+ RS.setRegUsed(SReg);
+ }
+ }
+
+ // Look for unassigned vregs in the defs of *I.
+ NextInstructionReadsVReg = false;
+ const MachineInstr &MI = *I;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Only vregs, no newly created vregs (see above).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ continue;
+ // We have to look at all operands anyway so we can precalculate here
+ // whether there is a reading operand. This allows use to skip the use
+ // step in the next iteration if there was none.
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ if (MO.readsReg()) {
+ NextInstructionReadsVReg = true;
+ }
+ if (MO.isDef()) {
+ unsigned SReg = scavengeVReg(MRI, RS, Reg, false);
+ I->addRegisterDead(SReg, &TRI, false);
+ }
+ }
+ }
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MBB.front().operands()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ assert(!MO.isInternalRead() && "Cannot assign inside bundles");
+ assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
+ assert(!MO.readsReg() && "Vreg use in first instruction not allowed");
+ }
+#endif
+
+ return MRI.getNumVirtRegs() != InitialNumVirtRegs;
+}
+
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Shortcut.
+ if (MRI.getNumVirtRegs() == 0) {
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+ return;
+ }
+
+ // Run through the instructions and find any virtual registers.
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ if (Again) {
+ DEBUG(dbgs() << "Warning: Required two scavenging passes for block "
+ << MBB.getName() << '\n');
+ Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);
+ // The target required a 2nd run (because it created new vregs while
+ // spilling). Refuse to do another pass to keep compiletime in check.
+ if (Again)
+ report_fatal_error("Incomplete scavenging after 2nd pass");
+ }
+ }
+
+ MRI.clearVirtRegs();
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+namespace {
+/// This class runs register scavenging independ of the PrologEpilogInserter.
+/// This is used in for testing.
+class ScavengerTest : public MachineFunctionPass {
+public:
+ static char ID;
+ ScavengerTest() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFL = *STI.getFrameLowering();
+
+ RegScavenger RS;
+ // Let's hope that calling those outside of PrologEpilogueInserter works
+ // well enough to initialize the scavenger with some emergency spillslots
+ // for the target.
+ BitVector SavedRegs;
+ TFL.determineCalleeSaves(MF, SavedRegs, &RS);
+ TFL.processFunctionBeforeFrameFinalized(MF, &RS);
+
+ // Let's scavenge the current function
+ scavengeFrameVirtualRegs(MF, RS);
+ return true;
+ }
+};
+char ScavengerTest::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ScavengerTest, "scavenger-test",
+ "Scavenge virtual registers inside basic blocks", false, false)
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 66f1966..30757f0 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,4 +1,4 @@
-//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===//
+//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,11 +12,22 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -27,7 +38,7 @@ static cl::opt<bool> DumpRegUsage(
cl::desc("print register usage details collected for analysis."));
INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
- "Register Usage Informartion Stroage", false, true)
+ "Register Usage Information Storage", false, true)
char PhysicalRegisterUsageInfo::ID = 0;
@@ -63,7 +74,7 @@ PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) {
void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
const TargetRegisterInfo *TRI;
- typedef std::pair<const Function *, std::vector<uint32_t>> FuncPtrRegMaskPair;
+ using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>;
SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 2f7ee8b..bd5ecbd 100644
--- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -32,10 +32,10 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
@@ -112,11 +112,11 @@ char RenameIndependentSubregs::ID;
char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
-INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs",
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs",
+INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE,
"Rename Independent Subregisters", false, false)
bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
@@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
const SmallVectorImpl<LiveInterval*> &Intervals) const {
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = Intervals[0]->reg;;
+ unsigned Reg = Intervals[0]->reg;
for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
E = MRI->reg_nodbg_end(); I != E; ) {
MachineOperand &MO = *I++;
@@ -243,6 +243,15 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
unsigned VReg = Intervals[ID]->reg;
MO.setReg(VReg);
+
+ if (MO.isTied() && Reg != VReg) {
+ /// Undef use operands are not tracked in the equivalence class but need
+ /// to be update if they are tied.
+ MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
+
+ // substituteRegister breaks the iterator, so restart.
+ I = MRI->reg_nodbg_begin(Reg);
+ }
}
// TODO: We could attempt to recompute new register classes while visiting
// the operands: Some of the split register may be fine with less constraint
diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 4519641..01b3db4 100644
--- a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -14,9 +14,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -30,17 +30,23 @@ namespace {
/// Tells whether or not this pass should emit a fallback
/// diagnostic when it resets a function.
bool EmitFallbackDiag;
+ /// Whether we should abort immediately instead of resetting the function.
+ bool AbortOnFailedISel;
public:
static char ID; // Pass identification, replacement for typeid
- ResetMachineFunction(bool EmitFallbackDiag = false)
- : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+ ResetMachineFunction(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false)
+ : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag),
+ AbortOnFailedISel(AbortOnFailedISel) {}
StringRef getPassName() const override { return "ResetMachineFunction"; }
bool runOnMachineFunction(MachineFunction &MF) override {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel)) {
+ if (AbortOnFailedISel)
+ report_fatal_error("Instruction selection failed");
DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
++NumFunctionsReset;
MF.reset();
@@ -62,6 +68,7 @@ INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
"reset machine function if ISel failed", false, false)
MachineFunctionPass *
-llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
- return new ResetMachineFunction(EmitFallbackDiag);
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false) {
+ return new ResetMachineFunction(EmitFallbackDiag, AbortOnFailedISel);
}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index 2b82df2..8584a9b 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -19,10 +19,12 @@
#include "SafeStackLayout.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -50,7 +52,7 @@
using namespace llvm;
using namespace llvm::safestack;
-#define DEBUG_TYPE "safestack"
+#define DEBUG_TYPE "safe-stack"
namespace llvm {
@@ -92,11 +94,11 @@ public:
/// determined statically), and the unsafe stack, which contains all
/// local variables that are accessed in ways that we can't prove to
/// be safe.
-class SafeStack : public FunctionPass {
- const TargetMachine *TM;
- const TargetLoweringBase *TL;
- const DataLayout *DL;
- ScalarEvolution *SE;
+class SafeStack {
+ Function &F;
+ const TargetLoweringBase &TL;
+ const DataLayout &DL;
+ ScalarEvolution &SE;
Type *StackPtrTy;
Type *IntPtrTy;
@@ -171,33 +173,21 @@ class SafeStack : public FunctionPass {
uint64_t AllocaSize);
public:
- static char ID; // Pass identification, replacement for typeid.
- SafeStack(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
- initializeSafeStackPass(*PassRegistry::getPassRegistry());
- }
- SafeStack() : SafeStack(nullptr) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolutionWrapperPass>();
- }
-
- bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
-
- StackPtrTy = Type::getInt8PtrTy(M.getContext());
- IntPtrTy = DL->getIntPtrType(M.getContext());
- Int32Ty = Type::getInt32Ty(M.getContext());
- Int8Ty = Type::getInt8Ty(M.getContext());
-
- return false;
- }
-
- bool runOnFunction(Function &F) override;
-}; // class SafeStack
+ SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
+ ScalarEvolution &SE)
+ : F(F), TL(TL), DL(DL), SE(SE),
+ StackPtrTy(Type::getInt8PtrTy(F.getContext())),
+ IntPtrTy(DL.getIntPtrType(F.getContext())),
+ Int32Ty(Type::getInt32Ty(F.getContext())),
+ Int8Ty(Type::getInt8Ty(F.getContext())) {}
+
+ // Run the transformation on the associated function.
+ // Returns whether the function was changed.
+ bool run();
+};
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
- uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
auto C = dyn_cast<ConstantInt>(AI->getArraySize());
if (!C)
@@ -209,11 +199,11 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
const Value *AllocaPtr, uint64_t AllocaSize) {
- AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
- const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
- uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
- ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE.getUnsignedRange(Expr);
ConstantRange SizeRange =
ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
ConstantRange AccessRange = AccessStartRange.add(SizeRange);
@@ -226,8 +216,8 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
<< *AllocaPtr << "\n"
<< " Access " << *Addr << "\n"
<< " SCEV " << *Expr
- << " U: " << SE->getUnsignedRange(Expr)
- << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " U: " << SE.getUnsignedRange(Expr)
+ << ", S: " << SE.getSignedRange(Expr) << "\n"
<< " Range " << AccessRange << "\n"
<< " AllocaRange " << AllocaRange << "\n"
<< " " << (Safe ? "safe" : "unsafe") << "\n");
@@ -266,7 +256,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
switch (I->getOpcode()) {
case Instruction::Load: {
- if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr,
AllocaSize))
return false;
break;
@@ -282,7 +272,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
return false;
}
- if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getOperand(0)->getType()),
AllocaPtr, AllocaSize))
return false;
break;
@@ -343,7 +333,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
}
Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
- Value *StackGuardVar = TL->getIRStackGuard(IRB);
+ Value *StackGuardVar = TL.getIRStackGuard(IRB);
if (!StackGuardVar)
StackGuardVar =
F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
@@ -390,7 +380,7 @@ void SafeStack::findInsts(Function &F,
if (!Arg.hasByValAttr())
continue;
uint64_t Size =
- DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ DL.getTypeStoreSize(Arg.getType()->getPointerElementType());
if (IsSafeStackAlloca(&Arg, Size))
continue;
@@ -451,7 +441,7 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
Constant *StackChkFail = F.getParent()->getOrInsertFunction(
- "__stack_chk_fail", IRB.getVoidTy(), nullptr);
+ "__stack_chk_fail", IRB.getVoidTy());
IRBFail.CreateCall(StackChkFail, {});
}
@@ -476,19 +466,19 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
unsigned Align =
- std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
}
for (Argument *Arg : ByValArguments) {
Type *Ty = Arg->getType()->getPointerElementType();
- uint64_t Size = DL->getTypeStoreSize(Ty);
+ uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty),
Arg->getParamAlignment());
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -501,7 +491,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// Ensure the object is properly aligned.
unsigned Align =
- std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI));
}
@@ -539,7 +529,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
unsigned Offset = SSL.getObjectOffset(Arg);
Type *Ty = Arg->getType()->getPointerElementType();
- uint64_t Size = DL->getTypeStoreSize(Ty);
+ uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
@@ -550,7 +540,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// Replace alloc with the new location.
replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- /*Deref=*/true, -Offset);
+ /*Deref=*/false, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
@@ -565,7 +555,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -630,7 +620,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
Type *Ty = AI->getAllocatedType();
- uint64_t TySize = DL->getTypeAllocSize(Ty);
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
@@ -638,7 +628,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
unsigned Align = std::max(
- std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()),
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
(unsigned)StackAlignment);
assert(isPowerOf2_32(Align));
@@ -655,7 +645,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -685,25 +675,10 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
}
-bool SafeStack::runOnFunction(Function &F) {
- DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
-
- if (!F.hasFnAttribute(Attribute::SafeStack)) {
- DEBUG(dbgs() << "[SafeStack] safestack is not requested"
- " for this function\n");
- return false;
- }
-
- if (F.isDeclaration()) {
- DEBUG(dbgs() << "[SafeStack] function definition"
- " is not available\n");
- return false;
- }
-
- if (!TM)
- report_fatal_error("Target machine is required");
- TL = TM->getSubtargetImpl(F)->getTargetLowering();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+bool SafeStack::run() {
+ assert(F.hasFnAttribute(Attribute::SafeStack) &&
+ "Can't run SafeStack on a function without the attribute");
+ assert(!F.isDeclaration() && "Can't run SafeStack on a function declaration");
++NumFunctions;
@@ -736,7 +711,7 @@ bool SafeStack::runOnFunction(Function &F) {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
- UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB);
+ UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
@@ -788,14 +763,67 @@ bool SafeStack::runOnFunction(Function &F) {
return true;
}
+class SafeStackLegacyPass : public FunctionPass {
+ const TargetMachine *TM;
+
+public:
+ static char ID; // Pass identification, replacement for typeid..
+ SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) {
+ initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return false;
+ }
+
+ if (F.isDeclaration()) {
+ DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return false;
+ }
+
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (!TL)
+ report_fatal_error("TargetLowering instance is required");
+
+ auto *DL = &F.getParent()->getDataLayout();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ // Compute DT and LI only for functions that have the attribute.
+ // This is only useful because the legacy pass manager doesn't let us
+ // compute analyzes lazily.
+ // In the backend pipeline, nothing preserves DT before SafeStack, so we
+ // would otherwise always compute it wastefully, even if there is no
+ // function with the safestack attribute.
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+
+ ScalarEvolution SE(F, TLI, ACT, DT, LI);
+
+ return SafeStack(F, *TL, *DL, SE).run();
+ }
+};
+
} // anonymous namespace
-char SafeStack::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
-INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
- "Safe Stack instrumentation pass", false, false)
+char SafeStackLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE,
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE,
+ "Safe Stack instrumentation pass", false, false)
-FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
- return new SafeStack(TM);
-}
+FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); }
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 7fbeadd..21f2fa4 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -20,9 +20,10 @@ using namespace llvm::safestack;
#define DEBUG_TYPE "safestackcoloring"
+// Disabled by default due to PR32143.
static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::desc("enable safe stack coloring"),
- cl::Hidden, cl::init(true));
+ cl::Hidden, cl::init(false));
const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
const auto IT = AllocaNumbering.find(AI);
@@ -236,6 +237,7 @@ void StackColoring::calculateLiveIntervals() {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
dbgs() << "Allocas:\n";
for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
@@ -262,6 +264,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
dbgs() << " " << AllocaNo << ": " << Range << "\n";
}
}
+#endif
void StackColoring::run() {
DEBUG(dumpAllocas());
diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
new file mode 100644
index 0000000..07b43a8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -0,0 +1,656 @@
+//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===//
+//=== instrinsics ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces masked memory intrinsics - when unsupported by the target
+// - with a chain of basic blocks, that deal with the elements one-by-one if the
+// appropriate mask bit is set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scalarize-masked-mem-intrin"
+
+namespace {
+
+class ScalarizeMaskedMemIntrin : public FunctionPass {
+ const TargetTransformInfo *TTI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) {
+ initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "Scalarize Masked Memory Intrinsics";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+private:
+ bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
+ bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
+};
+} // namespace
+
+char ScalarizeMaskedMemIntrin::ID = 0;
+INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
+ "Scalarize unsupported masked memory intrinsics", false, false)
+
+FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
+ return new ScalarizeMaskedMemIntrin();
+}
+
+// Translate a masked load intrinsic like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.load, label %else
+//
+// cond.load: ; preds = %0
+// %4 = getelementptr i32* %1, i32 0
+// %5 = load i32* %4
+// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// br label %else
+//
+// else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
+// %7 = extractelement <16 x i1> %mask, i32 1
+// %8 = icmp eq i1 %7, true
+// br i1 %8, label %cond.load1, label %else2
+//
+// cond.load1: ; preds = %else
+// %9 = getelementptr i32* %1, i32 1
+// %10 = load i32* %9
+// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// br label %else2
+//
+// else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+// %12 = extractelement <16 x i1> %mask, i32 2
+// %13 = icmp eq i1 %12, true
+// br i1 %13, label %cond.load4, label %else5
+//
+static void scalarizeMaskedLoad(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ Type *EltTy = CI->getType()->getVectorElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask =
+ isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_load = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %4 = extractelement <16 x i32> %val, i32 0
+// %5 = getelementptr i32* %1, i32 0
+// store i32 %4, i32* %5
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %6 = extractelement <16 x i1> %mask, i32 1
+// %7 = icmp eq i1 %6, true
+// br i1 %7, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %8 = extractelement <16 x i32> %val, i32 1
+// %9 = getelementptr i32* %1, i32 1
+// store i32 %8, i32* %9
+// br label %else2
+// . . .
+static void scalarizeMaskedStore(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(Src->getType());
+ assert(VecType && "Unexpected data type in masked store intrinsic");
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask =
+ isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_store = icmp eq i1 %mask_1, true
+ // br i1 %to_store, label %cond.store, label %else
+ //
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+//
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+//
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void scalarizeMaskedGather(CallInst *CI) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(
+ VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %ToLoad1 = icmp eq i1 %Mask1, true
+ // br i1 %ToLoad1, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToLoad" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+//
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+// . . .
+static void scalarizeMaskedScatter(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ assert(isa<VectorType>(Src->getType()) &&
+ "Unexpected data type in masked scatter intrinsic");
+ assert(isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+ // % ToStore = icmp eq i1 % Mask1, true
+ // br i1 % ToStore, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToStore" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // % Elt1 = extractelement <16 x i32> %Src, i32 1
+ // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 % Elt1, i32* % Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ bool EverMadeChange = false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(); I != F.end();) {
+ BasicBlock *BB = &*I++;
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ if (ModifiedDTOnIteration)
+ break;
+ }
+
+ EverMadeChange |= MadeChange;
+ }
+
+ return EverMadeChange;
+}
+
+bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+ bool MadeChange = false;
+
+ BasicBlock::iterator CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
+ MadeChange |= optimizeCallInst(CI, ModifiedDT);
+ if (ModifiedDT)
+ return true;
+ }
+
+ return MadeChange;
+}
+
+bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
+ bool &ModifiedDT) {
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::masked_load: {
+ // Scalarize unsupported vector masked load
+ if (!TTI->isLegalMaskedLoad(CI->getType())) {
+ scalarizeMaskedLoad(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_store: {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedStore(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_gather: {
+ if (!TTI->isLegalMaskedGather(CI->getType())) {
+ scalarizeMaskedGather(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_scatter: {
+ if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedScatter(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index 427d952..5e95f76 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,22 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAG class, which is a base class used by
-// scheduling implementation classes.
+/// \file Implements the ScheduleDAG class, which is a base class used by
+/// scheduling implementation classes.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -33,58 +43,87 @@ static cl::opt<bool> StressSchedOpt(
cl::desc("Stress test instruction scheduling"));
#endif
-void SchedulingPriorityQueue::anchor() { }
+void SchedulingPriorityQueue::anchor() {}
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
- MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
+ MRI(mf.getRegInfo()) {
#ifndef NDEBUG
StressSched = StressSchedOpt;
#endif
}
-ScheduleDAG::~ScheduleDAG() {}
+ScheduleDAG::~ScheduleDAG() = default;
-/// Clear the DAG state (e.g. between scheduling regions).
void ScheduleDAG::clearDAG() {
SUnits.clear();
EntrySU = SUnit();
ExitSU = SUnit();
}
-/// getInstrDesc helper to handle SDNodes.
const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
if (!Node || !Node->isMachineOpcode()) return nullptr;
return &TII->get(Node->getMachineOpcode());
}
-/// addPred - This adds the specified edge as a pred of the current node if
-/// not already. It also adds the current node as a successor of the
-/// specified node.
+LLVM_DUMP_METHOD
+raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ switch (getKind()) {
+ case Data: OS << "Data"; break;
+ case Anti: OS << "Anti"; break;
+ case Output: OS << "Out "; break;
+ case Order: OS << "Ord "; break;
+ }
+
+ switch (getKind()) {
+ case Data:
+ OS << " Latency=" << getLatency();
+ if (TRI && isAssignedRegDep())
+ OS << " Reg=" << PrintReg(getReg(), TRI);
+ break;
+ case Anti:
+ case Output:
+ OS << " Latency=" << getLatency();
+ break;
+ case Order:
+ OS << " Latency=" << getLatency();
+ switch(Contents.OrdKind) {
+ case Barrier: OS << " Barrier"; break;
+ case MayAliasMem:
+ case MustAliasMem: OS << " Memory"; break;
+ case Artificial: OS << " Artificial"; break;
+ case Weak: OS << " Weak"; break;
+ case Cluster: OS << " Cluster"; break;
+ }
+ break;
+ }
+
+ return OS;
+}
+
bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this dependence, don't add a redundant one.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
+ for (SDep &PredDep : Preds) {
// Zero-latency weak edges may be added purely for heuristic ordering. Don't
// add them if another kind of edge already exists.
- if (!Required && I->getSUnit() == D.getSUnit())
+ if (!Required && PredDep.getSUnit() == D.getSUnit())
return false;
- if (I->overlaps(D)) {
- // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
- if (I->getLatency() < D.getLatency()) {
- SUnit *PredSU = I->getSUnit();
+ if (PredDep.overlaps(D)) {
+ // Extend the latency if needed. Equivalent to
+ // removePred(PredDep) + addPred(D).
+ if (PredDep.getLatency() < D.getLatency()) {
+ SUnit *PredSU = PredDep.getSUnit();
// Find the corresponding successor in N.
- SDep ForwardD = *I;
+ SDep ForwardD = PredDep;
ForwardD.setSUnit(this);
- for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(),
- EE = PredSU->Succs.end(); II != EE; ++II) {
- if (*II == ForwardD) {
- II->setLatency(D.getLatency());
+ for (SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep == ForwardD) {
+ SuccDep.setLatency(D.getLatency());
break;
}
}
- I->setLatency(D.getLatency());
+ PredDep.setLatency(D.getLatency());
}
return false;
}
@@ -95,8 +134,10 @@ bool SUnit::addPred(const SDep &D, bool Required) {
SUnit *N = D.getSUnit();
// Update the bookkeeping.
if (D.getKind() == SDep::Data) {
- assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
- assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ assert(NumPreds < std::numeric_limits<unsigned>::max() &&
+ "NumPreds will overflow!");
+ assert(N->NumSuccs < std::numeric_limits<unsigned>::max() &&
+ "NumSuccs will overflow!");
++NumPreds;
++N->NumSuccs;
}
@@ -105,7 +146,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++WeakPredsLeft;
}
else {
- assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ assert(NumPredsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumPredsLeft will overflow!");
++NumPredsLeft;
}
}
@@ -114,7 +156,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++N->WeakSuccsLeft;
}
else {
- assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
++N->NumSuccsLeft;
}
}
@@ -127,51 +170,46 @@ bool SUnit::addPred(const SDep &D, bool Required) {
return true;
}
-/// removePred - This removes the specified edge as a pred of the current
-/// node if it exists. It also removes the current node as a successor of
-/// the specified node.
void SUnit::removePred(const SDep &D) {
// Find the matching predecessor.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I)
- if (*I == D) {
- // Find the corresponding successor in N.
- SDep P = D;
- P.setSUnit(this);
- SUnit *N = D.getSUnit();
- SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
- assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
- // Update the bookkeeping.
- if (P.getKind() == SDep::Data) {
- assert(NumPreds > 0 && "NumPreds will underflow!");
- assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
- --NumPreds;
- --N->NumSuccs;
- }
- if (!N->isScheduled) {
- if (D.isWeak())
- --WeakPredsLeft;
- else {
- assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
- --NumPredsLeft;
- }
- }
- if (!isScheduled) {
- if (D.isWeak())
- --N->WeakSuccsLeft;
- else {
- assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
- --N->NumSuccsLeft;
- }
- }
- if (P.getLatency() != 0) {
- this->setDepthDirty();
- N->setHeightDirty();
- }
- return;
+ SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D);
+ if (I == Preds.end())
+ return;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
}
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
}
void SUnit::setDepthDirty() {
@@ -181,9 +219,8 @@ void SUnit::setDepthDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isDepthCurrent = false;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(),
- E = SU->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (SDep &SuccDep : SU->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isDepthCurrent)
WorkList.push_back(SuccSU);
}
@@ -197,18 +234,14 @@ void SUnit::setHeightDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isHeightCurrent = false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),
- E = SU->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (SDep &PredDep : SU->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isHeightCurrent)
WorkList.push_back(PredSU);
}
} while (!WorkList.empty());
}
-/// setDepthToAtLeast - Update this node's successors to reflect the
-/// fact that this node's depth just increased.
-///
void SUnit::setDepthToAtLeast(unsigned NewDepth) {
if (NewDepth <= getDepth())
return;
@@ -217,9 +250,6 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth) {
isDepthCurrent = true;
}
-/// setHeightToAtLeast - Update this node's predecessors to reflect the
-/// fact that this node's height just increased.
-///
void SUnit::setHeightToAtLeast(unsigned NewHeight) {
if (NewHeight <= getHeight())
return;
@@ -228,8 +258,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight) {
isHeightCurrent = true;
}
-/// ComputeDepth - Calculate the maximal path from the node to the exit.
-///
+/// Calculates the maximal path from the node to the exit.
void SUnit::ComputeDepth() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -238,12 +267,11 @@ void SUnit::ComputeDepth() {
bool Done = true;
unsigned MaxPredDepth = 0;
- for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
- E = Cur->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (const SDep &PredDep : Cur->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
- PredSU->Depth + I->getLatency());
+ PredSU->Depth + PredDep.getLatency());
else {
Done = false;
WorkList.push_back(PredSU);
@@ -261,8 +289,7 @@ void SUnit::ComputeDepth() {
} while (!WorkList.empty());
}
-/// ComputeHeight - Calculate the maximal path from the node to the entry.
-///
+/// Calculates the maximal path from the node to the entry.
void SUnit::ComputeHeight() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -271,12 +298,11 @@ void SUnit::ComputeHeight() {
bool Done = true;
unsigned MaxSuccHeight = 0;
- for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
- E = Cur->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (const SDep &SuccDep : Cur->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
- SuccSU->Height + I->getLatency());
+ SuccSU->Height + SuccDep.getLatency());
else {
Done = false;
WorkList.push_back(SuccSU);
@@ -310,24 +336,31 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
- if (this == &DAG->ExitSU)
- OS << "ExitSU";
- else if (this == &DAG->EntrySU)
+LLVM_DUMP_METHOD
+raw_ostream &SUnit::print(raw_ostream &OS,
+ const SUnit *Entry, const SUnit *Exit) const {
+ if (this == Entry)
OS << "EntrySU";
+ else if (this == Exit)
+ OS << "ExitSU";
else
OS << "SU(" << NodeNum << ")";
+ return OS;
+}
+
+LLVM_DUMP_METHOD
+raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const {
+ return print(OS, &G->EntrySU, &G->ExitSU);
}
-/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
-/// a group of nodes flagged together.
+LLVM_DUMP_METHOD
void SUnit::dump(const ScheduleDAG *G) const {
print(dbgs(), G);
dbgs() << ": ";
G->dumpNode(this);
}
-void SUnit::dumpAll(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
dump(G);
dbgs() << " # preds left : " << NumPredsLeft << "\n";
@@ -343,89 +376,62 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
- for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
- dbgs() << " ";
- switch (I->getKind()) {
- case SDep::Data: dbgs() << "data "; break;
- case SDep::Anti: dbgs() << "anti "; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ord "; break;
- }
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
- dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
- dbgs() << "\n";
+ for (const SDep &Dep : Preds) {
+ dbgs() << " ";
+ Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
+ Dep.print(dbgs(), G->TRI); dbgs() << '\n';
}
}
if (Succs.size() != 0) {
dbgs() << " Successors:\n";
- for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
- I != E; ++I) {
- dbgs() << " ";
- switch (I->getKind()) {
- case SDep::Data: dbgs() << "data "; break;
- case SDep::Anti: dbgs() << "anti "; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ord "; break;
- }
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
- dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
- dbgs() << "\n";
+ for (const SDep &Dep : Succs) {
+ dbgs() << " ";
+ Dep.getSUnit()->print(dbgs(), G); dbgs() << ": ";
+ Dep.print(dbgs(), G->TRI); dbgs() << '\n';
}
}
}
#endif
#ifndef NDEBUG
-/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
-/// their state is consistent. Return the number of scheduled nodes.
-///
unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- if (!SUnits[i].isScheduled) {
- if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ for (const SUnit &SUnit : SUnits) {
+ if (!SUnit.isScheduled) {
+ if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) {
++DeadNodes;
continue;
}
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has not been scheduled!\n";
AnyNotSched = true;
}
- if (SUnits[i].isScheduled &&
- (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
- unsigned(INT_MAX)) {
+ if (SUnit.isScheduled &&
+ (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) >
+ unsigned(std::numeric_limits<int>::max())) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
}
if (isBottomUp) {
- if (SUnits[i].NumSuccsLeft != 0) {
+ if (SUnit.NumSuccsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has successors left!\n";
AnyNotSched = true;
}
} else {
- if (SUnits[i].NumPredsLeft != 0) {
+ if (SUnit.NumPredsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has predecessors left!\n";
AnyNotSched = true;
}
@@ -436,36 +442,33 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
}
#endif
-/// InitDAGTopologicalSorting - create the initial topological
-/// ordering from the DAG to be scheduled.
-///
-/// The idea of the algorithm is taken from
-/// "Online algorithms for managing the topological order of
-/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
-/// This is the MNR algorithm, which was first introduced by
-/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
-/// "Maintaining a topological order under edge insertions".
-///
-/// Short description of the algorithm:
-///
-/// Topological ordering, ord, of a DAG maps each node to a topological
-/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
-///
-/// This means that if there is a path from the node X to the node Z,
-/// then ord(X) < ord(Z).
-///
-/// This property can be used to check for reachability of nodes:
-/// if Z is reachable from X, then an insertion of the edge Z->X would
-/// create a cycle.
-///
-/// The algorithm first computes a topological ordering for the DAG by
-/// initializing the Index2Node and Node2Index arrays and then tries to keep
-/// the ordering up-to-date after edge insertions by reordering the DAG.
-///
-/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
-/// the nodes reachable from Y, and then shifts them using Shift to lie
-/// immediately after X in Index2Node.
void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ // The idea of the algorithm is taken from
+ // "Online algorithms for managing the topological order of
+ // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+ // This is the MNR algorithm, which was first introduced by
+ // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+ // "Maintaining a topological order under edge insertions".
+ //
+ // Short description of the algorithm:
+ //
+ // Topological ordering, ord, of a DAG maps each node to a topological
+ // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+ //
+ // This means that if there is a path from the node X to the node Z,
+ // then ord(X) < ord(Z).
+ //
+ // This property can be used to check for reachability of nodes:
+ // if Z is reachable from X, then an insertion of the edge Z->X would
+ // create a cycle.
+ //
+ // The algorithm first computes a topological ordering for the DAG by
+ // initializing the Index2Node and Node2Index arrays and then tries to keep
+ // the ordering up-to-date after edge insertions by reordering the DAG.
+ //
+ // On insertion of the edge X->Y, the algorithm first marks by calling DFS
+ // the nodes reachable from Y, and then shifts them using Shift to lie
+ // immediately after X in Index2Node.
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
@@ -476,18 +479,17 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// Initialize the data structures.
if (ExitSU)
WorkList.push_back(ExitSU);
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- int NodeNum = SU->NodeNum;
- unsigned Degree = SU->Succs.size();
+ for (SUnit &SU : SUnits) {
+ int NodeNum = SU.NodeNum;
+ unsigned Degree = SU.Succs.size();
// Temporarily use the Node2Index array as scratch space for degree counts.
Node2Index[NodeNum] = Degree;
// Is it a node without dependencies?
if (Degree == 0) {
- assert(SU->Succs.empty() && "SUnit should have no successors");
+ assert(SU.Succs.empty() && "SUnit should have no successors");
// Collect leaf nodes.
- WorkList.push_back(SU);
+ WorkList.push_back(&SU);
}
}
@@ -497,9 +499,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
WorkList.pop_back();
if (SU->NodeNum < DAGSize)
Allocate(SU->NodeNum, --Id);
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit *SU = I->getSUnit();
+ for (const SDep &PredDep : SU->Preds) {
+ SUnit *SU = PredDep.getSUnit();
if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
// If all dependencies of the node are processed already,
// then the node can be computed now.
@@ -511,19 +512,15 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
#ifndef NDEBUG
// Check correctness of the ordering
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ for (SUnit &SU : SUnits) {
+ for (const SDep &PD : SU.Preds) {
+ assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
"Wrong topological sorting");
}
}
#endif
}
-/// AddPred - Updates the topological ordering to accommodate an edge
-/// to be added from SUnit X to SUnit Y.
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
LowerBound = Node2Index[Y->NodeNum];
@@ -540,16 +537,10 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
}
}
-/// RemovePred - Updates the topological ordering to accommodate an
-/// an edge to be removed from the specified node N from the predecessors
-/// of the current node M.
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
// InitDAGTopologicalSorting();
}
-/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
-/// all nodes affected by the edge insertion. These nodes will later get new
-/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
bool &HasLoop) {
std::vector<const SUnit*> WorkList;
@@ -560,8 +551,9 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (int I = SU->Succs.size()-1; I >= 0; --I) {
- unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ for (const SDep &SuccDep
+ : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
continue;
@@ -571,14 +563,93 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
}
// Visit successors if not already and in affected region.
if (!Visited.test(s) && Node2Index[s] < UpperBound) {
- WorkList.push_back(SU->Succs[I].getSUnit());
+ WorkList.push_back(SuccDep.getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
+ const SUnit &TargetSU,
+ bool &Success) {
+ std::vector<const SUnit*> WorkList;
+ int LowerBound = Node2Index[StartSU.NodeNum];
+ int UpperBound = Node2Index[TargetSU.NodeNum];
+ bool Found = false;
+ BitVector VisitedBack;
+ std::vector<int> Nodes;
+
+ if (LowerBound > UpperBound) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.reserve(SUnits.size());
+ Visited.reset();
+
+ // Starting from StartSU, visit all successors up
+ // to UpperBound.
+ WorkList.push_back(&StartSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ const SUnit *Succ = SU->Succs[I].getSUnit();
+ unsigned s = Succ->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (Succ->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ Found = true;
+ continue;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ Visited.set(s);
+ WorkList.push_back(Succ);
+ }
+ }
+ } while (!WorkList.empty());
+
+ if (!Found) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.clear();
+ VisitedBack.resize(SUnits.size());
+ Found = false;
+
+ // Starting from TargetSU, visit all predecessors up
+ // to LowerBound. SUs that are visited by the two
+ // passes are added to Nodes.
+ WorkList.push_back(&TargetSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Preds.size()-1; I >= 0; --I) {
+ const SUnit *Pred = SU->Preds[I].getSUnit();
+ unsigned s = Pred->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
+ if (Pred->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == LowerBound) {
+ Found = true;
+ continue;
+ }
+ if (!VisitedBack.test(s) && Visited.test(s)) {
+ VisitedBack.set(s);
+ WorkList.push_back(Pred);
+ Nodes.push_back(s);
}
}
} while (!WorkList.empty());
+
+ assert(Found && "Error in SUnit Graph!");
+ Success = true;
+ return Nodes;
}
-/// Shift - Renumber the nodes so that the topological ordering is
-/// preserved.
void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
int UpperBound) {
std::vector<int> L;
@@ -598,28 +669,23 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
}
- for (unsigned j = 0; j < L.size(); ++j) {
- Allocate(L[j], i - shift);
+ for (unsigned LI : L) {
+ Allocate(LI, i - shift);
i = i + 1;
}
}
-
-/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
-/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
- for (SUnit::pred_iterator
- I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
- if (I->isAssignedRegDep() &&
- IsReachable(SU, I->getSUnit()))
+ for (const SDep &PredDep : TargetSU->Preds)
+ if (PredDep.isAssignedRegDep() &&
+ IsReachable(SU, PredDep.getSUnit()))
return true;
return false;
}
-/// IsReachable - Checks if SU is reachable from TargetSU.
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
// If insertion of the edge SU->TargetSU would create a cycle
@@ -637,7 +703,6 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
return HasLoop;
}
-/// Allocate - assign the topological index to the node n.
void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
Node2Index[n] = index;
Index2Node[index] = n;
@@ -647,4 +712,4 @@ ScheduleDAGTopologicalSort::
ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
: SUnits(sunits), ExitSU(exitsu) {}
-ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 611c5a7..99baa07 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -7,41 +7,63 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAGInstrs class, which implements re-scheduling
-// of MachineInstrs.
+/// \file This implements the ScheduleDAGInstrs class, which implements
+/// re-scheduling of MachineInstrs.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "misched"
+#define DEBUG_TYPE "machine-scheduler"
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
@@ -90,77 +112,17 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
- RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction()->getContext()))),
- FirstDbgValue(nullptr) {
+ Type::getVoidTy(mf.getFunction()->getContext()))) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
SchedModel.init(ST.getSchedModel(), &ST, TII);
}
-/// getUnderlyingObjectFromInt - This is the function that does the work of
-/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
-static const Value *getUnderlyingObjectFromInt(const Value *V) {
- do {
- if (const Operator *U = dyn_cast<Operator>(V)) {
- // If we find a ptrtoint, we can transfer control back to the
- // regular getUnderlyingObjectFromInt.
- if (U->getOpcode() == Instruction::PtrToInt)
- return U->getOperand(0);
- // If we find an add of a constant, a multiplied value, or a phi, it's
- // likely that the other operand will lead us to the base
- // object. We don't have to worry about the case where the
- // object address is somehow being computed by the multiply,
- // because our callers only care when the result is an
- // identifiable object.
- if (U->getOpcode() != Instruction::Add ||
- (!isa<ConstantInt>(U->getOperand(1)) &&
- Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
- !isa<PHINode>(U->getOperand(1))))
- return V;
- V = U->getOperand(0);
- } else {
- return V;
- }
- assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
- } while (1);
-}
-
-/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
-/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
-static void getUnderlyingObjects(const Value *V,
- SmallVectorImpl<Value *> &Objects,
- const DataLayout &DL) {
- SmallPtrSet<const Value *, 16> Visited;
- SmallVector<const Value *, 4> Working(1, V);
- do {
- V = Working.pop_back_val();
-
- SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
-
- for (Value *V : Objs) {
- if (!Visited.insert(V).second)
- continue;
- if (Operator::getOpcode(V) == Instruction::IntToPtr) {
- const Value *O =
- getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
- if (O->getType()->isPointerTy()) {
- Working.push_back(O);
- continue;
- }
- }
- Objects.push_back(const_cast<Value *>(V));
- }
- } while (!Working.empty());
-}
-
-/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
-/// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object.
+/// If this machine instr has memory reference information and it can be tracked
+/// to a normal reference to a known object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
@@ -189,12 +151,10 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs, DL);
+ getUnderlyingObjectsForCodeGen(V, Objs, DL);
for (Value *V : Objs) {
- if (!isIdentifiedObject(V))
- return false;
-
+ assert(isIdentifiedObject(V));
Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
}
} else
@@ -216,10 +176,6 @@ void ScheduleDAGInstrs::finishBlock() {
BB = nullptr;
}
-/// Initialize the DAG and common scheduler state for the current scheduling
-/// region. This does not actually create the DAG, only clears it. The
-/// scheduling driver may call BuildSchedGraph multiple times per scheduling
-/// region.
void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
@@ -230,20 +186,10 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
NumRegionInstrs = regioninstrs;
}
-/// Close the current scheduling region. Don't clear any state in case the
-/// driver wants to refer to the previous scheduling region.
void ScheduleDAGInstrs::exitRegion() {
// Nothing to do.
}
-/// addSchedBarrierDeps - Add dependencies from instructions in the current
-/// list of instructions being scheduled to scheduling barrier by adding
-/// the exit SU to the register defs and use list. This is because we want to
-/// make sure instructions which define registers that are either used by
-/// the terminator or are live-out are properly scheduled. This is
-/// especially important when the definition latency of the return value(s)
-/// are too high to be hidden by the branch or when the liveout registers
-/// used by instructions in the fallthrough block.
void ScheduleDAGInstrs::addSchedBarrierDeps() {
MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
@@ -271,7 +217,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
}
}
-/// MO is an operand of SU's instruction that defines a physical register. Add
+/// MO is an operand of SU's instruction that defines a physical register. Adds
/// data dependencies from SU to any uses of the physical register.
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
@@ -313,9 +259,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
-/// this SUnit to following instructions in the same scheduling region that
-/// depend the physical register referenced at OperIdx.
+/// \brief Adds register dependencies (data, anti, and output) from this SUnit
+/// to following instructions in the same scheduling region that depend the
+/// physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
@@ -406,9 +352,9 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
-/// addVRegDefDeps - Add register output and data dependencies from this SUnit
-/// to instructions that occur later in the same scheduling region if they read
-/// from or write to the virtual register defined at OperIdx.
+/// Adds register output and data dependencies from this SUnit to instructions
+/// that occur later in the same scheduling region if they read from or write to
+/// the virtual register defined at OperIdx.
///
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
@@ -515,10 +461,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
-/// addVRegUseDeps - Add a register data dependency if the instruction that
-/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
-/// register antidependency from this SUnit to instructions that occur later in
-/// the same scheduling region if they write the virtual register.
+/// \brief Adds a register data dependency if the instruction that defines the
+/// virtual register used at OperIdx is mapped to an SUnit. Add a register
+/// antidependency from this SUnit to instructions that occur later in the same
+/// scheduling region if they write the virtual register.
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
@@ -545,87 +491,25 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// Return true if MI is an instruction we are unable to reason about
+/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
-/// This returns true if the two MIs need a chain edge between them.
-/// This is called on normal stores and loads.
-static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, MachineInstr *MIa,
- MachineInstr *MIb) {
- const MachineFunction *MF = MIa->getParent()->getParent();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
- assert ((MIa->mayStore() || MIb->mayStore()) &&
- "Dependency checked between two loads");
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
- return false;
-
- // To this point analysis is generic. From here on we do need AA.
- if (!AA)
- return true;
-
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- return true;
-
- MachineMemOperand *MMOa = *MIa->memoperands_begin();
- MachineMemOperand *MMOb = *MIb->memoperands_begin();
-
- if (!MMOa->getValue() || !MMOb->getValue())
- return true;
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
- // - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
- // - There should never be any negative offsets here.
- //
- // FIXME: Modify API to hide this math from "user"
- // FIXME: Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
-
- assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
- assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
-
- int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
- int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
- int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
-
- AliasResult AAResult =
- AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
-
- return (AAResult != NoAlias);
-}
-
-/// Check whether two objects need a chain edge and add it if needed.
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
- if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
- SUb->getInstr())) {
+ if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
SDep Dep(SUa, SDep::MayAliasMem);
Dep.setLatency(Latency);
SUb->addPred(Dep);
}
}
-/// Create an SUnit for each real instruction, numbered in top-down topological
-/// order. The instruction order A < B, implies that no edge exists from B to A.
+/// \brief Creates an SUnit for each real instruction, numbered in top-down
+/// topological order. The instruction order A < B, implies that no edge exists
+/// from B to A.
///
/// Map each real instruction to its SUnit.
///
@@ -640,7 +524,7 @@ void ScheduleDAGInstrs::initSUnits() {
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
- for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
if (MI.isDebugValue())
continue;
@@ -682,23 +566,22 @@ void ScheduleDAGInstrs::initSUnits() {
}
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
-
/// Current total number of SUs in map.
- unsigned NumNodes;
+ unsigned NumNodes = 0;
/// 1 for loads, 0 for stores. (see comment in SUList)
unsigned TrueMemOrderLatency;
-public:
- Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+public:
+ Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
/// To keep NumNodes up to date, insert() is used instead of
/// this operator w/ push_back().
ValueType &operator[](const SUList &Key) {
llvm_unreachable("Don't use. Use insert() instead."); };
- /// Add SU to the SUList of V. If Map grows huge, reduce its size
- /// by calling reduce().
+ /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
+ /// reduce().
void inline insert(SUnit *SU, ValueType V) {
MapVector::operator[](V).push_back(SU);
NumNodes++;
@@ -708,7 +591,7 @@ public:
void inline clearList(ValueType V) {
iterator Itr = find(V);
if (Itr != end()) {
- assert (NumNodes >= Itr->second.size());
+ assert(NumNodes >= Itr->second.size());
NumNodes -= Itr->second.size();
Itr->second.clear();
@@ -723,8 +606,8 @@ public:
unsigned inline size() const { return NumNodes; }
- /// Count the number of SUs in this map after a reduction.
- void reComputeSize(void) {
+ /// Counts the number of SUs in this map after a reduction.
+ void reComputeSize() {
NumNodes = 0;
for (auto &I : *this)
NumNodes += I.second.size();
@@ -754,7 +637,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
}
void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
for (auto &I : map) {
SUList &sus = I.second;
@@ -765,7 +648,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
}
void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
// Go through all lists of SUs.
for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
@@ -797,9 +680,6 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
map.reComputeSize();
}
-/// If RegPressure is non-null, compute register pressure as a side effect. The
-/// DAG builder is an efficient place to do it because it already visits
-/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
@@ -1088,10 +968,6 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() {
}
}
-/// Reduce maps in FIFO order, by N SUs. This is better than turning
-/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
-/// it avoids unnecessary edges between seen SUs above the new
-/// BarrierChain, and those below it.
void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
Value2SUsMap &loads, unsigned N) {
DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
@@ -1113,7 +989,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
// let the not yet seen SUs have a dependency to the removed SUs.
- assert (N <= NodeNums.size());
+ assert(N <= NodeNums.size());
SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
if (BarrierChain) {
// The aliasing and non-aliasing maps reduce independently of each
@@ -1142,183 +1018,77 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
loads.dump());
}
-/// \brief Initialize register live-range state for updating kills.
-void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
- // Start with no live registers.
- LiveRegs.reset();
-
- // Examine the live-in regs of all successors.
- for (const MachineBasicBlock *Succ : BB->successors()) {
- for (const auto &LI : Succ->liveins()) {
- // Repeat, for reg and all subregs.
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.set(*SubRegs);
- }
- }
-}
-
-/// \brief If we change a kill flag on the bundle instruction implicit register
-/// operands, then we also need to propagate that to any instructions inside
-/// the bundle which had the same kill state.
-static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
- bool NewKillState,
- const TargetRegisterInfo *TRI) {
- if (MI->getOpcode() != TargetOpcode::BUNDLE)
- return;
-
- // Walk backwards from the last instruction in the bundle to the first.
- // Once we set a kill flag on an instruction, we bail out, as otherwise we
- // might set it on too many operands. We will clear as many flags as we
- // can though.
- MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
- while (Begin != End) {
- if (NewKillState) {
- if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
- return;
- } else
- (--End)->clearRegisterKills(Reg, TRI);
- }
-}
-
-bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
- // Setting kill flag...
- if (!MO.isKill()) {
- MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
- return false;
- }
-
- // If MO itself is live, clear the kill flag...
- if (LiveRegs.test(MO.getReg())) {
- MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
- return false;
- }
-
- // If any subreg of MO is live, then create an imp-def for that
- // subreg and keep MO marked as killed.
- MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
- bool AllDead = true;
- const unsigned SuperReg = MO.getReg();
- MachineInstrBuilder MIB(MF, MI);
- for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
- if (LiveRegs.test(*SubRegs)) {
- MIB.addReg(*SubRegs, RegState::ImplicitDefine);
- AllDead = false;
- }
- }
+static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
+ MachineInstr &MI, bool addToLiveRegs) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
- if(AllDead) {
- MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
+ // Things that are available after the instruction are killed by it.
+ bool IsKill = LiveRegs.available(MRI, Reg);
+ MO.setIsKill(IsKill);
+ if (addToLiveRegs)
+ LiveRegs.addReg(Reg);
}
- return false;
}
-// FIXME: Reuse the LivePhysRegs utility for this.
-void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
- DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
-
- LiveRegs.resize(TRI->getNumRegs());
- BitVector killedRegs(TRI->getNumRegs());
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
- startBlockForKills(MBB);
+ LiveRegs.init(*TRI);
+ LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
- unsigned Count = MBB->size();
- for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
- I != E; --Count) {
- MachineInstr &MI = *--I;
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (MI.isDebugValue())
continue;
// Update liveness. Registers that are defed but not used in this
// instruction are now dead. Mark register and all subregs as they
// are completely defined.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegMask())
- LiveRegs.clearBitsNotInMask(MO.getRegMask());
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
- if (!MO.isDef()) continue;
- // Ignore two-addr defs.
- if (MI.isRegTiedToUseOperand(i)) continue;
-
- // Repeat for reg and all subregs.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.reset(*SubRegs);
- }
-
- // Examine all used registers and set/clear kill flag. When a
- // register is used multiple times we only set the kill flag on
- // the first use. Don't set kill flags on undef operands.
- killedRegs.reset();
-
- // toggleKillFlag can append new operands (implicit defs), so using
- // a range-based loop is not safe. The new operands will be appended
- // at the end of the operand list and they don't need to be visited,
- // so iterating until the currently last operand is ok.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
- unsigned Reg = MO.getReg();
- if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
- bool kill = false;
- if (!killedRegs.test(Reg)) {
- kill = true;
- // A register is not killed if any subregs are live...
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- if (LiveRegs.test(*SubRegs)) {
- kill = false;
- break;
- }
- }
-
- // If subreg is not live, then register is killed if it became
- // live in this instruction
- if (kill)
- kill = !LiveRegs.test(Reg);
- }
-
- if (MO.isKill() != kill) {
- DEBUG(dbgs() << "Fixing " << MO << " in ");
- toggleKillFlag(&MI, MO);
- DEBUG(MI.dump());
- DEBUG({
- if (MI.getOpcode() == TargetOpcode::BUNDLE) {
- MachineBasicBlock::instr_iterator Begin = MI.getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
- while (++Begin != End)
- DEBUG(Begin->dump());
- }
- });
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (MO.isReg()) {
+ if (!MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ LiveRegs.removeReg(Reg);
+ } else if (MO.isRegMask()) {
+ LiveRegs.removeRegsInMask(MO);
}
-
- killedRegs.set(Reg);
}
- // Mark any used register (that is not using undef) and subregs as
- // now live...
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
- unsigned Reg = MO.getReg();
- if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.set(*SubRegs);
+ // If there is a bundle header fix it up first.
+ if (!MI.isBundled()) {
+ toggleKills(MRI, LiveRegs, MI, true);
+ } else {
+ MachineBasicBlock::instr_iterator First = MI.getIterator();
+ if (MI.isBundle()) {
+ toggleKills(MRI, LiveRegs, MI, false);
+ ++First;
+ }
+ // Some targets make the (questionable) assumtion that the instructions
+ // inside the bundle are ordered and consequently only the last use of
+ // a register inside the bundle can kill it.
+ MachineBasicBlock::instr_iterator I = std::next(First);
+ while (I->isBundledWithSucc())
+ ++I;
+ do {
+ if (!I->isDebugValue())
+ toggleKills(MRI, LiveRegs, *I, true);
+ --I;
+ } while(I != First);
}
}
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();
#endif
@@ -1347,23 +1117,24 @@ std::string ScheduleDAGInstrs::getDAGName() const {
//===----------------------------------------------------------------------===//
namespace llvm {
-/// \brief Internal state used to compute SchedDFSResult.
+
+/// Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
/// Join DAG nodes into equivalence classes by their subtree.
IntEqClasses SubtreeClasses;
/// List PredSU, SuccSU pairs that represent data edges between subtrees.
- std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+ std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
struct RootData {
unsigned NodeID;
- unsigned ParentNodeID; // Parent node (member of the parent subtree).
- unsigned SubInstrCount; // Instr count in this tree only, not children.
+ unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
+ unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
+ /// children.
RootData(unsigned id): NodeID(id),
- ParentNodeID(SchedDFSResult::InvalidSubtreeID),
- SubInstrCount(0) {}
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
unsigned getSparseSetIndex() const { return NodeID; }
};
@@ -1375,7 +1146,7 @@ public:
RootSet.setUniverse(R.DFSNodeData.size());
}
- /// Return true if this node been visited by the DFS traversal.
+ /// Returns true if this node been visited by the DFS traversal.
///
/// During visitPostorderNode the Node's SubtreeID is assigned to the Node
/// ID. Later, SubtreeID is updated but remains valid.
@@ -1384,7 +1155,7 @@ public:
!= SchedDFSResult::InvalidSubtreeID;
}
- /// Initialize this node's instruction count. We don't need to flag the node
+ /// Initializes this node's instruction count. We don't need to flag the node
/// visited until visitPostorder because the DAG cannot have cycles.
void visitPreorder(const SUnit *SU) {
R.DFSNodeData[SU->NodeNum].InstrCount =
@@ -1433,8 +1204,8 @@ public:
RootSet[SU->NodeNum] = RData;
}
- /// Called once for each tree edge after calling visitPostOrderNode on the
- /// predecessor. Increment the parent node's instruction count and
+ /// \brief Called once for each tree edge after calling visitPostOrderNode on
+ /// the predecessor. Increment the parent node's instruction count and
/// preemptively join this subtree to its parent's if it is small enough.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
R.DFSNodeData[Succ->NodeNum].InstrCount
@@ -1442,13 +1213,13 @@ public:
joinPredSubtree(PredDep, Succ);
}
- /// Add a connection for cross edges.
+ /// Adds a connection for cross edges.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
}
- /// Set each node's subtree ID to the representative ID and record connections
- /// between trees.
+ /// Sets each node's subtree ID to the representative ID and record
+ /// connections between trees.
void finalize() {
SubtreeClasses.compress();
R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
@@ -1484,8 +1255,8 @@ public:
}
protected:
- /// Join the predecessor subtree with the successor that is its DFS
- /// parent. Apply some heuristics before joining.
+ /// Joins the predecessor subtree with the successor that is its DFS parent.
+ /// Applies some heuristics before joining.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
bool CheckLimit = true) {
assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
@@ -1531,12 +1302,15 @@ protected:
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
-} // namespace llvm
+
+} // end namespace llvm
namespace {
-/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+
+/// Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
- std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+ std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
+
public:
bool isComplete() const { return DFSStack.empty(); }
@@ -1558,7 +1332,8 @@ public:
return getCurr()->Preds.end();
}
};
-} // anonymous
+
+} // end anonymous namespace
static bool hasDataSucc(const SUnit *SU) {
for (const SDep &SuccDep : SU->Succs) {
@@ -1569,7 +1344,7 @@ static bool hasDataSucc(const SUnit *SU) {
return false;
}
-/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
/// search from this root.
void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
@@ -1583,7 +1358,7 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
SchedDAGReverseDFS DFS;
Impl.visitPreorder(&SU);
DFS.follow(&SU);
- for (;;) {
+ while (true) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
const SDep &PredDep = *DFS.getPred();
@@ -1626,8 +1401,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
}
}
-LLVM_DUMP_METHOD
-void ILPValue::print(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
OS << "BADILP";
@@ -1635,8 +1410,7 @@ void ILPValue::print(raw_ostream &OS) const {
OS << format("%g", ((double)InstrCount / Length));
}
-LLVM_DUMP_METHOD
-void ILPValue::dump() const {
+LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
@@ -1648,4 +1422,6 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
return OS;
}
-} // namespace llvm
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index ca2881c..bb6a459 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 83bc1ba..b3d83d5 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,11 +15,13 @@
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cassert>
using namespace llvm;
@@ -29,8 +31,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
const char *ParentDebugType)
: ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
- DAG(SchedDAG), IssueWidth(0), IssueCount(0) {
-
+ DAG(SchedDAG) {
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
// avoid dealing with the boundary condition.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2c7bffe..432c86d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -33,6 +34,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
@@ -53,10 +55,6 @@ STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
- CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner alias-analysis heuristics"));
-
- static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
@@ -117,7 +115,7 @@ namespace {
SmallPtrSet<SDNode *, 32> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
- AliasAnalysis &AA;
+ AliasAnalysis *AA;
/// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now.
@@ -133,6 +131,9 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
@@ -177,6 +178,7 @@ namespace {
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
+ unsigned MaximumLegalStoreInBits;
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
@@ -232,11 +234,18 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitUADDO(SDNode *N);
+ SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitSUBC(SDNode *N);
+ SDValue visitUSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
+ SDValue visitADDCARRY(SDNode *N);
+ SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
+ SDValue visitSUBCARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
@@ -259,6 +268,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -271,9 +281,11 @@ namespace {
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSETCCE(SDNode *N);
+ SDValue visitSETCCCARRY(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertZext(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -336,6 +348,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
@@ -344,6 +357,8 @@ namespace {
bool NotExtCompare = false);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -361,14 +376,14 @@ namespace {
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
- SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
- SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
- SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
- SDNodeFlags *Flags, bool Reciprocal);
+ SDNodeFlags Flags, bool Reciprocal);
SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
- SDNodeFlags *Flags, bool Reciprocal);
+ SDNodeFlags Flags, bool Reciprocal);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -377,6 +392,7 @@ namespace {
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -384,9 +400,11 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
- SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx);
+ SDValue reduceBuildVecToTrunc(SDNode *N);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx);
+ SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -416,15 +434,12 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
};
/// This is a helper function for visitMUL to check the profitability
@@ -435,12 +450,6 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
- /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
- /// constant build_vector of the stored constant values in Stores.
- SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
- ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains,
- EVT Ty) const;
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -451,34 +460,36 @@ namespace {
EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
bool &NarrowLoad);
+ /// Helper function for MergeConsecutiveStores which merges the
+ /// component store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return number of stores that were merged into a merged store (always
- /// a prefix of \p StoreNode).
- bool MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector,
+ bool UseTrunc);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
- /// Loads that may alias with those stores are placed in AliasLoadNodes.
- void getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
/// Candidate stores have indirect dependency through their
/// operands. \return True if safe to merge
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ bool MergeConsecutiveStores(StoreSDNode *N);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -489,10 +500,17 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
}
/// Runs the dag combiner on all nodes in the work list
@@ -607,10 +625,16 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
switch (Op.getOpcode()) {
default: return false;
- case ISD::ConstantFP:
- // Don't invert constant FP values after legalize. The negated constant
- // isn't necessarily legal.
- return LegalOperations ? 0 : 1;
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ EVT VT = Op.getValueType();
+ return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ }
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
@@ -629,7 +653,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath &&
+ !Op.getNode()->getFlags().hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -667,7 +692,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
- const SDNodeFlags *Flags = Op.getNode()->getFlags();
+ const SDNodeFlags Flags = Op.getNode()->getFlags();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
@@ -950,8 +975,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
- APInt KnownZero, KnownOne;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ KnownBits Known;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
return false;
// Revisit the node.
@@ -1006,13 +1031,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
switch (Opc) {
default: break;
case ISD::AssertSext:
- return DAG.getNode(ISD::AssertSext, DL, PVT,
- SExtPromoteOperand(Op.getOperand(0), PVT),
- Op.getOperand(1));
+ if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
+ break;
case ISD::AssertZext:
- return DAG.getNode(ISD::AssertZext, DL, PVT,
- ZExtPromoteOperand(Op.getOperand(0), PVT),
- Op.getOperand(1));
+ if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
+ return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
+ break;
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
@@ -1079,37 +1104,44 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (!NN0.getNode())
- return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1;
- if (N0 == N1)
- NN1 = NN0;
- else {
- NN1 = PromoteOperand(N1, PVT, Replace1);
- if (!NN1.getNode())
- return SDValue();
- }
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
- AddToWorklist(NN0.getNode());
- if (NN1.getNode())
- AddToWorklist(NN1.getNode());
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
+
+ // We are always replacing N0/N1's use in N and only need
+ // additional replacements if there are additional uses.
+ Replace0 &= !N0->hasOneUse();
+ Replace1 &= (N0 != N1) && !N1->hasOneUse();
+
+ // Combine Op here so it is presreved past replacements.
+ CombineTo(Op.getNode(), RV);
- if (Replace0)
+ // If operands have a use ordering, make sur we deal with
+ // predecessor first.
+ if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
+ std::swap(N0, N1);
+ std::swap(NN0, NN1);
+ }
+
+ if (Replace0) {
+ AddToWorklist(NN0.getNode());
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
- if (Replace1)
+ }
+ if (Replace1) {
+ AddToWorklist(NN1.getNode());
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
-
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, NN0, NN1));
+ }
+ return Op;
}
return SDValue();
}
@@ -1137,26 +1169,32 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace = false;
SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
- N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
- N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = ZExtPromoteOperand(N0, PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
+
if (!N0.getNode())
return SDValue();
+ SDLoc DL(Op);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1361,8 +1399,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
- SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV);
+ DAG.ReplaceAllUsesWith(N, &RV);
}
// Push the new node and any users onto the worklist
@@ -1389,9 +1426,13 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
+ case ISD::USUBO: return visitUSUBO(N);
case ISD::ADDE: return visitADDE(N);
+ case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
+ case ISD::SUBCARRY: return visitSUBCARRY(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1415,6 +1456,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -1427,9 +1469,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SETCCE: return visitSETCCE(N);
+ case ISD::SETCCCARRY: return visitSETCCCARRY(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertZext: return visitAssertZext(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1530,7 +1574,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
- if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1574,7 +1618,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
- SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
@@ -1618,26 +1662,108 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
- SDValue Result;
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.count(Op) != 0) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
// If we've changed things around then replace token factor.
if (Changed) {
+ SDValue Result;
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
- // New and improved token factor.
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ } else {
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ }
}
-
- // Add users to worklist if AA is enabled, since it may introduce
- // a lot of new chained token factors while removing memory deps.
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- return CombineTo(N, Result, UseAA /*add to worklist*/);
+ return Result;
}
-
- return Result;
+ return SDValue();
}
/// MERGE_VALUES can always be eliminated.
@@ -1664,6 +1790,60 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ auto BinOpcode = BO->getOpcode();
+ assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
+ BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
+ BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
+ BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
+ BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
+ BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
+ BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
+ BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
+ BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
+ "Unexpected binary operator");
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ SDValue C1 = BO->getOperand(1);
+ if (!isConstantOrConstantVector(C1, true) &&
+ !isConstantFPBuildVectorOrConstantFP(C1))
+ return SDValue();
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
+ EVT VT = Sel.getValueType();
+ SDLoc DL(Sel);
+ SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
+ assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
+ isConstantFPBuildVectorOrConstantFP(NewCT)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
+ assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
+ isConstantFPBuildVectorOrConstantFP(NewCF)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1702,16 +1882,36 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (isNullConstant(N1))
return N0;
- // fold ((c1-A)+c2) -> (c1+c2)-A
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
- if (N0.getOpcode() == ISD::SUB)
- if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
+ // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
+ }
+
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneConstantOrOneSplatConstant(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
+ }
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
@@ -1771,9 +1971,60 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+ DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ if (SDValue Combined = visitADDLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLike(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
+ bool Masked = false;
+
+ // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
+ while (true) {
+ if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
+ V = V.getOperand(0);
+ continue;
+ }
+
+ if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ Masked = true;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ break;
+ }
+
+ // If this is not a carry, return.
+ if (V.getResNo() != 1)
+ return SDValue();
+
+ if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
+ V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
+ return SDValue();
+
+ // If the result is masked, then no matter what kind of bool it is we can
+ // return. If it isn't, then we need to make sure the bool type is either 0 or
+ // 1 and not other values.
+ if (Masked ||
+ TLI.getBooleanContents(V.getValueType()) ==
+ TargetLoweringBase::ZeroOrOneBooleanContent)
+ return V;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
@@ -1781,12 +2032,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N1,
- DAG.getNode(ISD::SHL, DL, VT,
- N0.getOperand(0).getOperand(1),
- N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
@@ -1797,7 +2042,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits &&
isOneConstantOrOneSplatConstant(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
// add (sext i1), X -> sub X, (zext i1)
@@ -1818,6 +2063,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
+ // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
+ return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
+ N0, N1.getOperand(0), N1.getOperand(2));
+
+ // (add X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), N0,
+ DAG.getConstant(0, DL, VT), Carry);
+
return SDValue();
}
@@ -1825,40 +2082,90 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ DL, MVT::Glue));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
- // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
+ // fold (uaddo x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
+ auto VT = N0.getValueType();
+
+ // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ // If Y + 1 cannot overflow.
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
+ SDValue Y = N1.getOperand(0);
+ SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
+ if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
+ N1.getOperand(2));
}
+ // (uaddo X, Carry) -> (addcarry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ if (SDValue Carry = getAsCarry(TLI, N1))
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), VT), Carry);
+
return SDValue();
}
@@ -1881,6 +2188,90 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (addcarry x, y, false) -> (uaddo x, y)
+ if (isNullConstant(CarryIn))
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
+
+ // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ if (isNullConstant(N0) && isNullConstant(N1)) {
+ EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
+ SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
+ AddToWorklist(CarryExt.getNode());
+ return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
+ DAG.getConstant(1, DL, VT)),
+ DAG.getConstant(0, DL, CarryVT));
+ }
+
+ if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N) {
+ // Iff the flag result is dead:
+ // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ if ((N0.getOpcode() == ISD::ADD ||
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
+ isNullConstant(N1) && !N->hasAnyUseOfValue(1))
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+ N0.getOperand(0), N0.getOperand(1), CarryIn);
+
+ /**
+ * When one of the addcarry argument is itself a carry, we may be facing
+ * a diamond carry propagation. In which case we try to transform the DAG
+ * to ensure linear carry propagation if that is possible.
+ *
+ * We are trying to get:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ */
+ if (auto Y = getAsCarry(TLI, N1)) {
+ /**
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ */
+ if (Y.getOpcode() == ISD::UADDO &&
+ CarryIn.getResNo() == 1 &&
+ CarryIn.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(CarryIn.getOperand(1)) &&
+ CarryIn.getOperand(0) == Y.getValue(0)) {
+ auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
+ Y.getOperand(0), Y.getOperand(1),
+ CarryIn.getOperand(2));
+ AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ DAG.getConstant(0, SDLoc(N), N0.getValueType()),
+ NewY.getValue(1));
+ }
+ }
+
+ return SDValue();
+}
+
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -1920,6 +2311,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
N1.getNode());
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (sub x, c) -> (add x, -c)
@@ -1944,13 +2338,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// 0 - X --> 0 if the sub is NUW.
- if (N->getFlags()->hasNoUnsignedWrap())
+ if (N->getFlags().hasNoUnsignedWrap())
return N0;
- if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
// N1 is either 0 or the minimum signed value. If the sub is NSW, then
// N1 must be 0 because negating the minimum signed value is undefined.
- if (N->getFlags()->hasNoSignedWrap())
+ if (N->getFlags().hasNoSignedWrap())
return N0;
// 0 - X --> X if X is 0 or the minimum signed value.
@@ -2066,6 +2460,38 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (usubo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // fold (usubo x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2078,6 +2504,18 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (subcarry x, y, false) -> (usubo x, y)
+ if (isNullConstant(CarryIn))
+ return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2122,15 +2560,19 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1 == 0)
+ if (N1IsConst && ConstValue1.isNullValue())
return N1;
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
+ if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue()) {
SDLoc DL(N);
@@ -2297,6 +2739,23 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
return combined;
}
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2319,8 +2778,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
@@ -2332,9 +2796,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
- (N1C->getAPIntValue().isPowerOf2() ||
- (-N1C->getAPIntValue()).isPowerOf2())) {
+ !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
@@ -2372,7 +2835,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2384,13 +2847,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2414,6 +2870,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -2444,7 +2906,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2456,13 +2918,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2482,32 +2937,35 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -2536,13 +2994,6 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
- // undef % X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X % undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2932,95 +3383,139 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullConstantOrNullSplatConstant(LR);
+ bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
+ : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
-SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// fold (and x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(LocReference), VT);
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- if (isAllOnesConstant(LR)) {
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (Op1 == ISD::SETGT) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
- (isAllOnesConstant(LR) && isNullConstant(RR)))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
@@ -3037,13 +3532,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDLoc DL(N0);
+ SDLoc DL0(N0);
SDValue NewAdd =
- DAG.getNode(ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL0, VT,
N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
- return SDValue(LocReference, 0);
+ return SDValue(N, 0);
}
}
}
@@ -3068,7 +3563,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
- if (APIntOps::isMask(AndMask) &&
+ if (AndMask.isMask() &&
// Required bits must not span the two halves of the integer and
// must fit in the half size type.
(ShiftBits + MaskBits <= Size / 2) &&
@@ -3108,7 +3603,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
bool &NarrowLoad) {
uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
- if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
return false;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -3191,13 +3686,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
- if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3299,6 +3798,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
@@ -3316,10 +3819,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // Fold the AND away, taking care not to fold to the old load node if we
- // replaced it.
- CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
-
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3398,9 +3897,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Note: the SimplifyDemandedBits fold below can make an information-losing
// transform, and then we have no way to find this better fold.
if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
- ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
- SDValue SubRHS = N0.getOperand(1);
- if (SubLHS && SubLHS->isNullValue()) {
+ if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
+ SDValue SubRHS = N0.getOperand(1);
if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
return SubRHS;
@@ -3412,7 +3910,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
@@ -3473,7 +3971,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
EVT VT = N->getValueType(0);
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
return SDValue();
- if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
// Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
@@ -3585,27 +4083,36 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
return false;
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ SDValue N0 = N.getOperand(0);
+ unsigned Opc0 = N0.getOpcode();
+ if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = nullptr;
+ // SHL or SRL: look upstream for AND mask operand
+ if (Opc == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ else if (Opc0 == ISD::AND)
+ N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N1C)
return false;
- unsigned Num;
+ unsigned MaskByteOffset;
switch (N1C->getZExtValue()) {
default:
return false;
- case 0xFF: Num = 0; break;
- case 0xFF00: Num = 1; break;
- case 0xFF0000: Num = 2; break;
- case 0xFF000000: Num = 3; break;
+ case 0xFF: MaskByteOffset = 0; break;
+ case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFF0000: MaskByteOffset = 2; break;
+ case 0xFF000000: MaskByteOffset = 3; break;
}
// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
- SDValue N0 = N.getOperand(0);
if (Opc == ISD::AND) {
- if (Num == 0 || Num == 2) {
+ if (MaskByteOffset == 0 || MaskByteOffset == 2) {
// (x >> 8) & 0xff
// (x >> 8) & 0xff0000
- if (N0.getOpcode() != ISD::SRL)
+ if (Opc0 != ISD::SRL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3613,7 +4120,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else {
// (x << 8) & 0xff00
// (x << 8) & 0xff000000
- if (N0.getOpcode() != ISD::SHL)
+ if (Opc0 != ISD::SHL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3622,7 +4129,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else if (Opc == ISD::SHL) {
// (x & 0xff) << 8
// (x & 0xff0000) << 8
- if (Num != 0 && Num != 2)
+ if (MaskByteOffset != 0 && MaskByteOffset != 2)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3630,17 +4137,17 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else { // Opc == ISD::SRL
// (x & 0xff00) >> 8
// (x & 0xff000000) >> 8
- if (Num != 1 && Num != 3)
+ if (MaskByteOffset != 1 && MaskByteOffset != 3)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
- if (Parts[Num])
+ if (Parts[MaskByteOffset])
return false;
- Parts[Num] = N0.getOperand(0).getNode();
+ Parts[MaskByteOffset] = N0.getOperand(0).getNode();
return true;
}
@@ -3657,7 +4164,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
EVT VT = N->getValueType(0);
if (VT != MVT::i32)
return SDValue();
- if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
// Look for either
@@ -3672,18 +4179,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (N1.getOpcode() == ISD::OR &&
N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
// (or (or (and), (and)), (or (and), (and)))
- SDValue N000 = N00.getOperand(0);
- if (!isBSwapHWordElement(N000, Parts))
+ if (!isBSwapHWordElement(N00, Parts))
return SDValue();
- SDValue N001 = N00.getOperand(1);
- if (!isBSwapHWordElement(N001, Parts))
+ if (!isBSwapHWordElement(N01, Parts))
return SDValue();
- SDValue N010 = N01.getOperand(0);
- if (!isBSwapHWordElement(N010, Parts))
+ SDValue N10 = N1.getOperand(0);
+ if (!isBSwapHWordElement(N10, Parts))
return SDValue();
- SDValue N011 = N01.getOperand(1);
- if (!isBSwapHWordElement(N011, Parts))
+ SDValue N11 = N1.getOperand(1);
+ if (!isBSwapHWordElement(N11, Parts))
return SDValue();
} else {
// (or (or (or (and), (and)), (and)), (and))
@@ -3723,65 +4228,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
-SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
// fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.isUndef() || N1.isUndef())) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
- SDLoc(LocReference), VT);
- }
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
@@ -3802,7 +4258,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
- SDLoc DL(LocReference);
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
@@ -3818,7 +4273,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
}
return SDValue();
@@ -3847,14 +4302,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
- N0.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
- N1.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -3867,7 +4318,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
// Ensure both shuffles have a zero input.
- if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
@@ -3939,6 +4390,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -3955,20 +4410,22 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// reassociate or
if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
+
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) == 0.
- if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
- ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
- N1C, C1))
- return DAG.getNode(
- ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
- return SDValue();
+ // iff (c1 & c2) != 0.
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
+ if (SDValue COR =
+ DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
+ return DAG.getNode(
+ ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
+ return SDValue();
+ }
}
}
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
@@ -3978,9 +4435,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
@@ -4134,6 +4593,20 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return nullptr;
}
+// if Left + Right == Sum (constant or constant splat vector)
+static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ EVT ShiftVT = Left.getValueType();
+ if (ShiftVT != Right.getValueType()) return false;
+
+ SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT,
+ Left.getNode(), Right.getNode());
+ if (!ShiftSum) return false;
+
+ ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum);
+ return CSum && CSum->getZExtValue() == Sum;
+}
+
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
@@ -4179,31 +4652,24 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
- uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != EltSizeInBits)
- return nullptr;
-
+ if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
- SDValue Mask = DAG.getConstant(AllBits, DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
+ SDValue Mask = AllOnes;
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
- DAG.getNode(ISD::OR, DL, VT, LHSMask,
- DAG.getConstant(RHSBits, DL, VT)));
+ DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
- DAG.getNode(ISD::OR, DL, VT, RHSMask,
- DAG.getConstant(LHSBits, DL, VT)));
+ DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
}
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
@@ -4246,109 +4712,299 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (add (i8 load %index)
-/// (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
-/// (i32 1)))))
-struct BaseIndexOffset {
- SDValue Base;
- SDValue Index;
- int64_t Offset;
- bool IsIndexSignExt;
-
- BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
- BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
- bool IsIndexSignExt) :
- Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
- bool equalBaseIndex(const BaseIndexOffset &Other) {
- return Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt;
- }
-
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
- int64_t PartialOffset = 0) {
- bool IsIndexSignExt = false;
-
- // Split up a folded GlobalAddress+Offset into its component parts.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
- if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
- return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
- SDLoc(GA),
- GA->getValueType(0),
- /*Offset=*/PartialOffset,
- /*isTargetGA=*/false,
- GA->getTargetFlags()),
- SDValue(),
- GA->getOffset(),
- IsIndexSignExt);
- }
-
- // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
- // instruction, then it could be just the BASE or everything else we don't
- // know how to handle. Just use Ptr as BASE and give up.
- if (Ptr->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // We know that we have at least an ADD instruction. Try to pattern match
- // the simple case of BASE + OFFSET.
- if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
- }
-
- // Inside a loop the current BASE pointer is calculated using an ADD and a
- // MUL instruction. In this case Ptr is the actual BASE pointer.
- // (i64 add (i64 %array_ptr)
- // (i64 mul (i64 %induction_var)
- // (i64 %element_size)))
- if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Look at Base + Index + Offset cases.
- SDValue Base = Ptr->getOperand(0);
- SDValue IndexOffset = Ptr->getOperand(1);
-
- // Skip signextends.
- if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
- IndexOffset = IndexOffset->getOperand(0);
- IsIndexSignExt = true;
- }
-
- // Either the case of Base + Index (no offset) or something else.
- if (IndexOffset->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
-
- // Now we have the case of Base + Index + offset.
- SDValue Index = IndexOffset->getOperand(0);
- SDValue Offset = IndexOffset->getOperand(1);
-
- if (!isa<ConstantSDNode>(Offset))
- return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
-
- // Ignore signextends.
- if (Index->getOpcode() == ISD::SIGN_EXTEND) {
- Index = Index->getOperand(0);
- IsIndexSignExt = true;
- } else IsIndexSignExt = false;
-
- int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
- return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
+/// Represents known origin of an individual byte in load combine pattern. The
+/// value of the byte is either constant zero or comes from memory.
+struct ByteProvider {
+ // For constant zero providers Load is set to nullptr. For memory providers
+ // Load represents the node which loads the byte from memory.
+ // ByteOffset is the offset of the byte in the value produced by the load.
+ LoadSDNode *Load;
+ unsigned ByteOffset;
+
+ ByteProvider() : Load(nullptr), ByteOffset(0) {}
+
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
+ return ByteProvider(Load, ByteOffset);
}
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+
+ bool isConstantZero() const { return !Load; }
+ bool isMemory() const { return Load; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ }
+
+private:
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
+ : Load(Load), ByteOffset(ByteOffset) {}
};
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns None if the provider can't be calculated.
+///
+/// For all the values except the root of the expression verifies that the value
+/// has exactly one use and if it's not true return None. This way if the origin
+/// of the byte is returned it's guaranteed that the values which contribute to
+/// the byte are not used outside of this expression.
+///
+/// Because the parts of the expression are not allowed to have more than one
+/// use this function iterates over trees, not DAGs. So it never visits the same
+/// node more than once.
+const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
+ unsigned Depth,
+ bool Root = false) {
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return None;
+
+ if (!Root && !Op.hasOneUse())
+ return None;
+
+ assert(Op.getValueType().isScalarInteger() && "can't handle other types");
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return None;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ if (!LHS)
+ return None;
+ auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ if (!RHS)
+ return None;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return None;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return None;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return None;
+ uint64_t ByteShift = BitShift / 8;
+
+ return Index < ByteShift
+ ? ByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1);
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (L->isVolatile() || L->isIndexed())
+ return None;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return ByteProvider::getMemory(L, Index);
+ }
+ }
+
+ return None;
+}
} // namespace
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
+ unsigned BW, unsigned i) { return i; };
+ std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
+ unsigned BW, unsigned i) { return BW - i - 1; };
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&] (ByteProvider P) {
+ assert(P.isMemory() && "Must be a memory byte provider");
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget
+ ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ };
+
+ Optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallSet<LoadSDNode *, 8> Loads;
+ Optional<ByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ return SDValue();
+
+ LoadSDNode *L = P->Load;
+ assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ "Must be enforced by calculateByteProvider");
+ assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ ByteOffsetFromBase += MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+ assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
+ if (!BigEndian && !LittleEndian)
+ return SDValue();
+ }
+ assert((BigEndian != LittleEndian) && "should be either or");
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ LoadSDNode *FirstLoad = FirstByteProvider->Load;
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single load and bswap if needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != BigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, FirstLoad->getAddressSpace(),
+ FirstLoad->getAlignment(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4386,6 +5042,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate xor
if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
return RXOR;
@@ -4403,9 +5063,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
- return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
- return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
}
}
@@ -4470,6 +5130,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -4505,8 +5176,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return Tmp;
// Simplify the expression using non-local knowledge.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
@@ -4613,13 +5283,51 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
}
SDValue DAGCombiner::visitRotate(SDNode *N) {
+ SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ unsigned Bitsize = VT.getScalarSizeInBits();
+
+ // fold (rot x, 0) -> x
+ if (isNullConstantOrNullSplatConstant(N1))
+ return N0;
+
+ // fold (rot x, c) -> (rot x, c % BitSize)
+ if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
+ if (Cst->getAPIntValue().uge(Bitsize)) {
+ uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
+ return DAG.getNode(N->getOpcode(), dl, VT, N0,
+ DAG.getConstant(RotAmt, dl, N1.getValueType()));
+ }
+ }
+
// fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
- if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
- N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
- if (SDValue NewOp1 =
- distributeTruncateThroughAnd(N->getOperand(1).getNode()))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- N->getOperand(0), NewOp1);
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
+ }
+
+ unsigned NextOp = N0.getOpcode();
+ // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+ if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
+ SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
+ SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
+ if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
+ EVT ShiftVT = C1->getValueType(0);
+ bool SameSide = (N->getOpcode() == NextOp);
+ unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
+ if (SDValue CombinedShift =
+ DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
+ BitsizeC.getNode());
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
+ }
}
return SDValue();
}
@@ -4662,7 +5370,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
// fold (shl 0, x) -> 0
- if (isNullConstant(N0))
+ if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (shl x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
@@ -4673,6 +5381,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl undef, x) -> 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -4763,7 +5475,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
- cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+ N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t C1 = N0C1->getZExtValue();
uint64_t C2 = N1C->getZExtValue();
@@ -4788,12 +5500,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
SDValue Shift;
if (c2 > c1) {
- Mask = Mask.shl(c2 - c1);
+ Mask <<= c2 - c1;
SDLoc DL(N);
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
DAG.getConstant(c2 - c1, DL, N1.getValueType()));
} else {
- Mask = Mask.lshr(c1 - c2);
+ Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
DAG.getConstant(c1 - c2, DL, N1.getValueType()));
@@ -4808,9 +5520,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
- unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
@@ -4851,6 +5562,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// Arithmetic shifting an all-sign-bit value is a no-op.
+ // fold (sra 0, x) -> 0
+ // fold (sra -1, x) -> -1
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
return N0;
@@ -4865,18 +5578,16 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
- // fold (sra 0, x) -> 0
- if (isNullConstant(N0))
- return N0;
- // fold (sra -1, x) -> -1
- if (isAllOnesConstant(N0))
- return N0;
// fold (sra x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
@@ -5016,7 +5727,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
// fold (srl 0, x) -> 0
- if (isNullConstant(N0))
+ if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
@@ -5024,6 +5735,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -5049,24 +5764,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
- N0.getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- // This is only valid if the OpSizeInBits + c1 = size of inner shift.
- if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N0);
- if (c1 + c2 >= InnerShiftSize)
- return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(ISD::SRL, DL, InnerShiftVT,
- N0.getOperand(0)->getOperand(0),
- DAG.getConstant(c1 + c2, DL,
- ShiftCountVT)));
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ uint64_t c1 = N001C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ N0.getOperand(0).getOperand(0),
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
+ }
}
}
@@ -5074,9 +5789,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
- APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
@@ -5097,7 +5811,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
DAG.getConstant(ShiftAmt, DL0,
getShiftAmountTy(SmallVT)));
AddToWorklist(SmallShift.getNode());
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
+ APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
@@ -5115,20 +5829,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
- APInt KnownZero, KnownOne;
- DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
+ KnownBits Known;
+ DAG.computeKnownBits(N0.getOperand(0), Known);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
- if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
+ if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
- APInt UnknownBits = ~KnownZero;
+ APInt UnknownBits = ~Known.Zero;
if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
- if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ if (UnknownBits.isPowerOf2()) {
// Okay, we know that only that the single bit specified by UnknownBits
// could be set on input to the CTLZ node. If this bit is set, the SRL
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
@@ -5202,6 +5916,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5217,7 +5947,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
@@ -5311,7 +6045,6 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
-// TODO: We should handle other cases of selecting between {-1,0,1} here.
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5320,6 +6053,67 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ if (CondVT == MVT::i1 && !LegalOperations) {
+ if (C1->isNullValue() && C2->isOne()) {
+ // select Cond, 0, 1 --> zext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isNullValue() && C2->isAllOnesValue()) {
+ // select Cond, 0, -1 --> sext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isOne() && C2->isNullValue()) {
+ // select Cond, 1, 0 --> zext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+ if (C1->isAllOnesValue() && C2->isNullValue()) {
+ // select Cond, -1, 0 --> sext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add. Use a target hook because some targets may prefer to
+ // transform in the other direction.
+ if (TLI.convertSelectOfConstantsToMath()) {
+ if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ }
+
+ return SDValue();
+ }
+
// fold (select Cond, 0, 1) -> (xor Cond, 1)
// We can't do this reliably if integer based booleans have different contents
// to floating point based booleans. This is because we can't tell whether we
@@ -5329,15 +6123,14 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
- if (VT.isInteger() &&
- (CondVT == MVT::i1 || (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
- DAG.getConstant(1, DL, CondVT));
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isNullValue() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
return NotCond;
return DAG.getZExtOrTrunc(NotCond, DL, VT);
@@ -5352,19 +6145,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
+ SDLoc DL(N);
// fold (select C, X, X) -> X
if (N1 == N2)
return N1;
+
if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
// fold (select true, X, Y) -> X
// fold (select false, X, Y) -> Y
return !N0C->isNullValue() ? N1 : N2;
}
+
// fold (select X, X, Y) -> (or X, Y)
// fold (select X, 1, Y) -> (or C, Y)
if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
+ return DAG.getNode(ISD::OR, DL, VT, N0, N2);
if (SDValue V = foldSelectOfConstants(N))
return V;
@@ -5373,22 +6169,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
+ return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
}
// fold (select C, X, 1) -> (or (not C), X)
if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
AddToWorklist(NOTNode.getNode());
- return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
+ return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
}
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0, N1);
// If we can fold this based on the true/false value, do so.
if (SimplifySelectOps(N, N1, N2))
- return SDValue(N, 0); // Don't revisit N.
+ return SDValue(N, 0); // Don't revisit N.
if (VT0 == MVT::i1) {
// The code in this block deals with the following 2 equivalences:
@@ -5399,27 +6195,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// to the right anyway if we find the inner select exists in the DAG anyway
// and we always transform to the left side if we know that we can further
// optimize the combination of the conditions.
- bool normalizeToSequence
- = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ bool normalizeToSequence =
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
// select (and Cond0, Cond1), X, Y
// -> select Cond0, (select Cond1, X, Y), Y
if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
if (normalizeToSequence || !InnerSelect.use_empty())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
InnerSelect, N2);
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
- N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect =
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
if (normalizeToSequence || !InnerSelect.use_empty())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
InnerSelect);
}
@@ -5431,15 +6227,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
- SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
- N0, N1_0);
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
- N1_1, N2);
+ SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
}
// Otherwise see if we can optimize the "and" to a better pattern.
if (SDValue Combined = visitANDLike(N0, N1_0, N))
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
- N1_1, N2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
+ N2);
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -5450,15 +6244,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
- SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
- N0, N2_0);
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
- N1, N2_2);
+ SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
- N1, N2_2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
+ N2_2);
}
}
}
@@ -5469,8 +6261,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
SDValue Cond0 = N0->getOperand(0);
if (C->isOne())
- return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
- Cond0, N2, N1);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1);
}
}
}
@@ -5487,24 +6278,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// FIXME: Instead of testing for UnsafeFPMath, this should be checking for
// no signed zeros as well as no nans.
const TargetOptions &Options = DAG.getTarget().Options;
- if (Options.UnsafeFPMath &&
- VT.isFloatingPoint() && N0.hasOneUse() &&
+ if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1), N1, N2, CC,
- TLI, DAG))
+ if (SDValue FMinMax = combineMinNumMaxNum(
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
return FMinMax;
}
if ((!LegalOperations &&
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
TLI.isOperationLegal(ISD::SELECT_CC, VT))
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- N1, N2, N0.getOperand(2));
- return SimplifySelect(SDLoc(N), N0, N1, N2);
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, N0.getOperand(2));
+ return SimplifySelect(DL, N0, N1, N2);
}
return SDValue();
@@ -5847,7 +6635,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5908,6 +6696,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (isAbs) {
EVT VT = LHS.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ return DAG.getNode(ISD::ABS, DL, VT, LHS);
+
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
@@ -5921,34 +6712,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // If the VSELECT result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (N0.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
-
- // Add the new VSELECT nodes to the work list in case they need to be split
- // again.
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
-
// Fold (vselect (build_vector all_ones), N1, N2) -> N1
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N1;
@@ -6030,6 +6793,19 @@ SDValue DAGCombiner::visitSETCCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (isNullConstant(Carry))
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
@@ -6258,6 +7034,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
CombineTo(N, NewValue);
// Replace uses of the original load (before extension)
@@ -6270,9 +7049,55 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+/// If we're narrowing or widening the result of a vector select and the final
+/// size is the same size as a setcc (compare) feeding the select, then try to
+/// apply the cast operation to the select's operands because matching vector
+/// sizes for a select condition and other operands should be more efficient.
+SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
+ unsigned CastOpcode = Cast->getOpcode();
+ assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
+ CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
+ CastOpcode == ISD::FP_ROUND) &&
+ "Unexpected opcode for vector select narrowing/widening");
+
+ // We only do this transform before legal ops because the pattern may be
+ // obfuscated by target-specific operations after legalization. Do not create
+ // an illegal select op, however, because that may be difficult to lower.
+ EVT VT = Cast->getValueType(0);
+ if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ SDValue VSel = Cast->getOperand(0);
+ if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
+ VSel.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Does the setcc have the same vector size as the casted select?
+ SDValue SetCC = VSel.getOperand(0);
+ EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
+ if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
+ SDValue A = VSel.getOperand(1);
+ SDValue B = VSel.getOperand(2);
+ SDValue CastA, CastB;
+ SDLoc DL(Cast);
+ if (CastOpcode == ISD::FP_ROUND) {
+ // FP_ROUND (fptrunc) has an extra flag operand to pass along.
+ CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
+ CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
+ } else {
+ CastA = DAG.getNode(CastOpcode, DL, VT, A);
+ CastB = DAG.getNode(CastOpcode, DL, VT, B);
+ }
+ return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
+}
+
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
LegalOperations))
@@ -6281,8 +7106,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
@@ -6314,12 +7138,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -6329,7 +7153,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
@@ -6349,17 +7173,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::SIGN_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0);
}
}
@@ -6376,8 +7203,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
@@ -6411,32 +7237,38 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
- CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::SIGN_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
if (N0.getOpcode() == ISD::SETCC) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT N00VT = N0.getOperand(0).getValueType();
+
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(N0VT) ==
+ TLI.getBooleanContents(N00VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
- EVT SVT = getSetCCResultType(N0VT);
+ EVT SVT = getSetCCResultType(N00VT);
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
@@ -6444,19 +7276,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
// If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
}
}
@@ -6465,36 +7293,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// getBooleanContents().
unsigned SetCCWidth = N0.getScalarValueSizeInBits();
- SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
- // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal =
- (SetCCWidth == 1)
- ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
- DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
-
- if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
- DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
+ : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC =
+ SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
if (!VT.isVector()) {
- EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
- SDLoc DL(N);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC =
- DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
- return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
- DAG.getConstant(0, DL, VT));
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
}
}
}
@@ -6502,21 +7324,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
return SDValue();
}
// isTruncateOf - If N is a truncate of some other value, return true, record
-// the value being truncated in Op and which of Op's bits are zero in KnownZero.
-// This function computes KnownZero to avoid a duplicated call to
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
// computeKnownBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
- APInt &KnownZero) {
- APInt KnownOne;
+ KnownBits &Known) {
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
- DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, Known);
return true;
}
@@ -6535,9 +7359,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
else
return false;
- DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, Known);
- if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ if (!(Known.Zero | 1).isAllOnesValue())
return false;
return true;
@@ -6562,8 +7386,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// This is valid when the truncated bits of x are already zero.
// FIXME: We should extend this to work for vectors too.
SDValue Op;
- APInt KnownZero;
- if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ KnownBits Known;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
APInt TruncatedBits =
(Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
APInt(Op.getValueSizeInBits(), 0) :
@@ -6571,14 +7395,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueSizeInBits(),
std::min(Op.getValueSizeInBits(),
VT.getSizeInBits()));
- if (TruncatedBits == (KnownZero & TruncatedBits)) {
- if (VT.bitsGT(Op.getValueType()))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
- if (VT.bitsLT(Op.getValueType()))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
-
- return Op;
- }
+ if (TruncatedBits.isSubsetOf(Known.Zero))
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
// fold (zext (truncate (load x))) -> (zext (smaller load x))
@@ -6625,14 +7443,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
- SDValue Op = N0.getOperand(0);
- if (SrcVT.bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (SrcVT.bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- }
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ AddToWorklist(Op.getNode());
return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
}
@@ -6646,11 +7458,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
- if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
- } else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
- }
+ X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDLoc DL(N);
@@ -6677,14 +7485,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::ZERO_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND);
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6734,11 +7546,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+ bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
- CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::ZERO_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N,0); // Return N so it doesn't get rechecked!
}
}
}
@@ -6837,6 +7652,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
ShAmt);
}
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
return SDValue();
}
@@ -6871,14 +7689,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
// fold (aext (truncate x))
- if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue TruncOp = N0.getOperand(0);
- if (TruncOp.getValueType() == VT)
- return TruncOp; // x iff x size == zext size.
- if (TruncOp.getValueType().bitsGT(VT))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
- return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
- }
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
// if the trunc is not free.
@@ -6889,11 +7701,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getValueType())) {
SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
- if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
- } else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
- }
+ X = DAG.getAnyExtOrTrunc(X, DL, VT);
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
@@ -6917,13 +7725,18 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
- CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ANY_EXTEND);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceTrunc = N0.hasOneUse();
+ CombineTo(N, ExtLoad);
+ if (NoReplaceTrunc)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ else
+ CombineTo(LN0, Trunc, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6991,9 +7804,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
+ if (N0.getOpcode() == ISD::AssertZext &&
+ EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ return SDValue();
+}
+
/// See if the specified operand can be simplified with the knowledge that only
/// the bits specified by Mask are used. If so, return the simpler operand,
/// otherwise return a null SDValue.
+///
+/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+/// simplify nodes with multiple uses more aggressively.)
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -7029,6 +7858,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
+ return V.getOperand(0);
+ break;
+ }
}
return SDValue();
}
@@ -7169,7 +8006,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
DAG.getConstant(PtrOff, DL, PtrType),
- &Flags);
+ Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -7244,6 +8081,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+ return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ }
+
// fold (sext_in_reg (zext x)) -> (sext x)
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
@@ -7254,7 +8101,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
@@ -7439,18 +8286,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
- if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getScalarSizeInBits();
-
- if (Amt < Size) {
- SDLoc SL(N);
- EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Amt = N0.getOperand(1);
+ KnownBits Known;
+ DAG.computeKnownBits(Amt, Known);
+ unsigned Size = VT.getScalarSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- return DAG.getNode(ISD::SHL, SL, VT, Trunc,
- DAG.getConstant(Amt, SL, AmtVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ if (AmtVT != Amt.getValueType()) {
+ Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
+ AddToWorklist(Amt.getNode());
}
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
}
}
@@ -7496,6 +8345,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
VT.getSizeInBits())))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
+
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -7517,6 +8367,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
}
+
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
@@ -7582,6 +8433,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
+ N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
+ (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ SDLoc SL(N);
+ auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ auto VTs = DAG.getVTList(VT, N0->getValueType(1));
+ return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
+ }
+
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
return SDValue();
}
@@ -7645,11 +8512,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
switch (N0.getOpcode()) {
case ISD::AND:
FPOpcode = ISD::FABS;
- SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
break;
case ISD::XOR:
FPOpcode = ISD::FNEG;
- SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+ SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
break;
// TODO: ISD::OR --> ISD::FNABS?
default:
@@ -7672,6 +8539,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
// on the bitconvert.
@@ -7757,7 +8627,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
assert(VT.getSizeInBits() == 128);
SDValue SignBit = DAG.getConstant(
- APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
SDValue FlipBit;
if (N0.getOpcode() == ISD::FNEG) {
FlipBit = SignBit;
@@ -7777,7 +8647,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
}
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
NewConv, DAG.getConstant(SignBit, DL, VT));
@@ -7825,7 +8695,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
}
if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(Cst.getNode());
SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
@@ -7846,7 +8716,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
AddToWorklist(FlipBits.getNode());
return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
}
- APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
AddToWorklist(X.getNode());
@@ -8029,7 +8899,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal = OpVal.lshr(DstBitSize);
+ OpVal.lshrInPlace(DstBitSize);
}
// For big endian targets, swap the order of the pieces of each element.
@@ -8040,6 +8910,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getBuildVector(VT, DL, Ops);
}
+static bool isContractable(SDNode *N) {
+ SDNodeFlags F = N->getFlags();
+ return F.hasAllowContract() || F.hasUnsafeAlgebra();
+}
+
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -8048,24 +8923,27 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- ;
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8073,35 +8951,39 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && N0.getOpcode() == ISD::FMUL &&
- N1.getOpcode() == ISD::FMUL) {
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
if (N0.getNode()->use_size() > N1.getNode()->use_size())
std::swap(N0, N1);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse())) {
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8113,7 +8995,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -8154,7 +9036,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -8169,7 +9051,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1);
@@ -8195,7 +9077,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1);
@@ -8208,7 +9090,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
- if (N120.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N120))
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0);
@@ -8224,7 +9106,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
- if (N102.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N102))
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0);
@@ -8244,23 +9126,26 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
-
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8268,9 +9153,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
@@ -8278,16 +9170,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
@@ -8297,12 +9187,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8316,7 +9206,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8336,7 +9226,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8358,7 +9248,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8378,10 +9268,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
+ N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8395,9 +9284,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8410,14 +9298,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8440,7 +9328,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8461,7 +9349,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (N120.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N120)) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8488,7 +9376,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = N1.getOperand(0).getOperand(0);
SDValue N101 = N1.getOperand(0).getOperand(1);
SDValue N102 = N1.getOperand(0).getOperand(2);
- if (N102.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N102)) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8601,6 +9489,14 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
return SDValue();
}
+static bool isFMulNegTwo(SDValue &N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
+ return CFP->isExactlyValue(-2.0);
+ return false;
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8609,7 +9505,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+ const SDNodeFlags Flags = N->getFlags();
// fold vector ops
if (VT.isVector())
@@ -8624,6 +9520,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
@@ -8636,8 +9535,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
+ // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
+ if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
+ (isFMulNegTwo(N1) && N1.hasOneUse())) {
+ bool N1IsFMul = isFMulNegTwo(N1);
+ SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
+ }
+
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
@@ -8760,7 +9669,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+ const SDNodeFlags Flags = N->getFlags();
// fold vector ops
if (VT.isVector())
@@ -8771,13 +9680,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -8828,7 +9740,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+ const SDNodeFlags Flags = N->getFlags();
// fold vector ops
if (VT.isVector()) {
@@ -8850,6 +9762,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -8914,6 +9829,52 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
}
+ // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
+ // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
+ if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
+ (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
+ TLI.isOperationLegal(ISD::FABS, VT)) {
+ SDValue Select = N0, X = N1;
+ if (Select.getOpcode() != ISD::SELECT)
+ std::swap(Select, X);
+
+ SDValue Cond = Select.getOperand(0);
+ auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
+ auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
+
+ if (TrueOpnd && FalseOpnd &&
+ Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
+ isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
+ cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ std::swap(TrueOpnd, FalseOpnd);
+ // Fall through
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
+ TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FABS, DL, VT, X));
+ if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
+ return DAG.getNode(ISD::FABS, DL, VT, X);
+
+ break;
+ }
+ }
+ }
+
// FMUL -> FMA combines:
if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
@@ -8969,7 +9930,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
- &Flags), &Flags);
+ Flags), Flags);
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
@@ -8979,7 +9940,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT,
N0.getOperand(0),
DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
- &Flags),
+ Flags),
N2);
}
}
@@ -9005,16 +9966,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(1.0, DL, VT), &Flags),
- &Flags);
+ DAG.getConstantFP(1.0, DL, VT), Flags),
+ Flags);
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(-1.0, DL, VT), &Flags),
- &Flags);
+ DAG.getConstantFP(-1.0, DL, VT), Flags),
+ Flags);
}
}
@@ -9030,8 +9991,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
- const SDNodeFlags *Flags = N->getFlags();
- if (!UnsafeMath && !Flags->hasAllowReciprocal())
+ const SDNodeFlags Flags = N->getFlags();
+ if (!UnsafeMath && !Flags.hasAllowReciprocal())
return SDValue();
// Skip if current node is a reciprocal.
@@ -9054,7 +10015,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
- if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ if (UnsafeMath || U->getFlags().hasAllowReciprocal())
Users.insert(U);
}
}
@@ -9093,7 +10054,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+ SDNodeFlags Flags = N->getFlags();
// fold vector ops
if (VT.isVector())
@@ -9104,6 +10065,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -9204,8 +10168,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
- &cast<BinaryWithFlagsSDNode>(N)->Flags);
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
return SDValue();
}
@@ -9222,7 +10188,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// For now, create a Flags object for use with all unsafe math transforms.
SDNodeFlags Flags;
Flags.setUnsafeAlgebra(true);
- return buildSqrtEstimate(N0, &Flags);
+ return buildSqrtEstimate(N0, Flags);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -9497,6 +10463,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
Tmp, N0.getOperand(1));
}
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
return SDValue();
}
@@ -9563,6 +10532,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+ if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
+ return NewVSel;
+
return SDValue();
}
@@ -9624,11 +10596,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x80... per scalar element
// and splat it.
- SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x80...
- SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
}
SDLoc DL0(N0);
Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
@@ -9648,10 +10620,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (Level >= AfterLegalizeDAG &&
(TLI.isFPImmLegal(CVal, VT) ||
TLI.isOperationLegal(ISD::ConstantFP, VT)))
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N0.getOperand(1)),
- &cast<BinaryWithFlagsSDNode>(N0)->Flags);
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
+ N0->getFlags());
}
}
@@ -9729,11 +10701,11 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x7f... per scalar element
// and splat it.
- SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
+ SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x7f...
- SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+ SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
}
SDLoc DL(N0);
Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
@@ -10149,7 +11121,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
//
// where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
- // indexed load/store and the expresion that needs to be re-written.
+ // indexed load/store and the expression that needs to be re-written.
//
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
@@ -10361,7 +11333,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
-
+ AddUsersToWorklist(Chain.getNode());
if (N->use_empty())
deleteAndRecombine(N);
@@ -10414,7 +11386,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, Chain.getOperand(1), Chain);
+ return CombineTo(N, PrevST->getOperand(1), Chain);
}
}
@@ -10432,14 +11404,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && LD->isUnindexed()) {
+ if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -10462,12 +11427,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
MVT::Other, Chain, ReplLoad.getValue(1));
- // Make sure the new and old chains are cleaned up.
- AddToWorklist(Token.getNode());
-
- // Replace uses with load result and token factor. Don't add users
- // to work list.
- return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ // Replace uses with load result and token factor
+ return CombineTo(N, ReplLoad.getValue(0), Token);
}
}
@@ -10490,7 +11451,7 @@ namespace {
/// Shift = srl Ty1 Origin, CstTy Amount
/// Inst = trunc Shift to Ty2
///
-/// Then, it will be rewriten into:
+/// Then, it will be rewritten into:
/// Slice = load SliceTy, Base + SliceOffset
/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
///
@@ -10959,7 +11920,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// Check if this is a trunc(lshr).
if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
isa<ConstantSDNode>(User->getOperand(1))) {
- Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
+ Shift = User->getConstantOperandVal(1);
User = *User->use_begin();
}
@@ -11021,6 +11982,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
return true;
}
@@ -11414,44 +12376,36 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-SDValue DAGCombiner::getMergedConstantVectorStore(
- SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
- SmallVector<SDValue, 8> BuildVector;
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
- StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
- Chains.push_back(St->getChain());
- BuildVector.push_back(St->getValue());
+ // don't include nodes that are children
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- return DAG.getBuildVector(Ty, SL, BuildVector);
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
- unsigned NumStores, bool IsConstantSrc, bool UseVector) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector, bool UseTrunc) {
// Make sure we have something to merge.
if (NumStores < 2)
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned LatestNodeUsed = 0;
-
- for (unsigned i=0; i < NumStores; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
- }
-
- SmallVector<SDValue, 8> Chains;
// The latest Node in the DAG.
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
@@ -11467,7 +12421,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ if (MemVT.getScalarType().isInteger())
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
+ Val = DAG.getConstant(
+ (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
+ SDLoc(CFP), MemVT);
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
@@ -11477,7 +12442,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
- Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
@@ -11497,14 +12461,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- Chains.push_back(St->getChain());
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt |= C->getAPIntValue().zext(SizeInBits);
+ StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
} else {
llvm_unreachable("Invalid constant element type");
}
@@ -11515,194 +12478,181 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- assert(!Chains.empty());
-
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
- SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlignment());
-
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA) {
- // Replace all merged stores with the new store.
- for (unsigned i = 0; i < NumStores; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumStores; ++i) {
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
-
- StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
+
+ // make sure we use trunc store if it's necessary to be legal.
+ SDValue NewStore;
+ if (UseVector || !UseTrunc) {
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlignment());
+ } else { // Must be realized as a trunc store
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+ unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits();
+ ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
+ SDValue ExtendedStoreVal =
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
+ LegalizedStoredValueTy);
+ NewStore = DAG.getTruncStore(
+ NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+
+ AddToWorklist(NewChain.getNode());
return true;
}
-void DAGCombiner::getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return;
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- EVT MemVT = St->getMemoryVT();
- unsigned Seq = 0;
- StoreSDNode *Index = St;
-
-
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-
- if (UseAA) {
- // Look at other users of the same chain. Stores on the same chain do not
- // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
- // to be on the same chain, so don't bother looking at adjacent chains.
-
- SDValue Chain = St->getChain();
- for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- if (I.getOperandNo() != 0)
- continue;
-
- if (OtherST->isVolatile() || OtherST->isIndexed())
- continue;
-
- if (OtherST->getMemoryVT() != MemVT)
- continue;
-
- BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
-
- if (Ptr.equalBaseIndex(BasePtr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
- }
+ bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
+ isa<ConstantFPSDNode>(St->getValue());
+ bool IsExtractVecSrc =
+ (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ BaseIndexOffset LBasePtr;
+ // Match on loadbaseptr if relevant.
+ if (IsLoadSrc)
+ LBasePtr = BaseIndexOffset::match(
+ cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
+
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
+ int64_t &Offset) -> bool {
+ if (Other->isVolatile() || Other->isIndexed())
+ return false;
+ // We can merge constant floats to equivalent integers
+ if (Other->getMemoryVT() != MemVT)
+ if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsLoadSrc) {
+ // The Load's Base Ptr must also match
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
+ auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ } else
+ return false;
}
-
- return;
- }
-
- while (Index) {
- // If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
- break;
-
- // Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
-
- // Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
- break;
-
- // The memory operands must not be volatile.
- if (Index->isVolatile() || Index->isIndexed())
- break;
-
- // No truncation.
- if (Index->isTruncatingStore())
- break;
-
- // The stored memory type must be the same.
- if (Index->getMemoryVT() != MemVT)
- break;
-
- // We do not allow under-aligned stores in order to prevent
- // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
- // be irrelevant here; what MATTERS is that we not move memory
- // operations that potentially overlap past each-other.
- if (Index->getAlignment() < MemVT.getStoreSize())
- break;
-
- // We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
-
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
- SDNode *NextInChain = Index->getChain().getNode();
- while (1) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- if (Ldn->isVolatile()) {
- Index = nullptr;
- break;
+ if (IsConstantSrc)
+ if (!(isa<ConstantSDNode>(Other->getValue()) ||
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsExtractVecSrc)
+ if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ return false;
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
+ };
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ SDNode *RootNode = (St->getChain()).getNode();
+
+ if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ if (I2.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
+ }
+ } else
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
-
- // Save the load node for later. Continue the scan.
- AliasLoadNodes.push_back(Ldn);
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }
- }
}
-// We need to check that merging these stores does not cause a loop
-// in the DAG. Any store candidate may depend on another candidate
+// We need to check that merging these stores does not cause a loop in
+// the DAG. Any store candidate may depend on another candidate
// indirectly through its operand (we already consider dependencies
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
+
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
+
+ // FIXME: We should be able to truncate a full search of
+ // predecessors by doing a BFS and keeping tabs the originating
+ // stores from which worklist nodes come from in a similar way to
+ // TokenFactor simplfication.
+
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
- // search ops of store candidates
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ unsigned int Max = 8192;
+ // Search Ops of store candidates.
+ for (unsigned i = 0; i < NumStores; ++i) {
SDNode *n = StoreNodes[i].MemNode;
// Potential loops may happen only through non-chain operands
for (unsigned j = 1; j < n->getNumOperands(); ++j)
Worklist.push_back(n->getOperand(j).getNode());
}
- // search through DAG. We can stop early if we find a storenode
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
- if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
+ // Search through DAG. We can stop early if we find a store node.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
+ Max))
+ return false;
+ // Check if we ended early, failing conservatively if so.
+ if (Visited.size() >= Max)
return false;
}
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+
+ if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat);
@@ -11731,376 +12681,437 @@ bool DAGCombiner::MergeConsecutiveStores(
if (MemVT.isVector() && IsLoadSrc)
return false;
- // Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 0);
- if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
- return false;
-
- // Save the LoadSDNodes that we find in the chain.
- // We need to make sure that these nodes do not interfere with
- // any of the store nodes.
- SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
- getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+ SmallVector<MemOpLink, 8> StoreNodes;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dependence check in AA case
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
- return false;
-
// Sort the memory operands according to their distance from the
- // base pointer. As a secondary criteria: make sure stores coming
- // later in the code come first in the list. This is important for
- // the non-UseAA case, because we're merging stores into the FINAL
- // store along a chain which potentially contains aliasing stores.
- // Thus, if there are multiple stores to the same address, the last
- // one can be considered for merging but not the others.
+ // base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase ||
- (LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum < RHS.SequenceNum);
- });
-
- // Scan the memory operations on the chain and find the first non-consecutive
- // store memory address.
- unsigned LastConsecutiveStore = 0;
- int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
-
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
+
+ // Store Merge attempts to merge the lowest stores. This generally
+ // works out as if successful, as the remaining stores are checked
+ // after the first collection of stores is merged. However, in the
+ // case that a non-mergeable store is found first, e.g., {p[-2],
+ // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
+ // mergeable cases. To prevent this, we prune such stores from the
+ // front of StoreNodes here.
+
+ bool RV = false;
+ while (StoreNodes.size() > 1) {
+ unsigned StartIdx = 0;
+ while ((StartIdx + 1 < StoreNodes.size()) &&
+ StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
+ StoreNodes[StartIdx + 1].OffsetFromBase)
+ ++StartIdx;
+
+ // Bail if we don't have enough candidates to merge.
+ if (StartIdx + 1 >= StoreNodes.size())
+ return RV;
+
+ if (StartIdx)
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive store memory address.
+ unsigned NumConsecutiveStores = 1;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
// Check that the addresses are consecutive starting from the second
// element in the list of stores.
- if (i > 0) {
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
+ NumConsecutiveStores = i + 1;
}
- // Check if this store interferes with any of the loads that we found.
- // If we find a load that alias with this store. Stop the sequence.
- if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
- break;
+ if (NumConsecutiveStores < 2) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumConsecutiveStores);
+ continue;
+ }
- // Mark this node as useful.
- LastConsecutiveStore = i;
- }
+ // Check that we can merge these candidates without causing a cycle
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes,
+ NumConsecutiveStores)) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumConsecutiveStores);
+ continue;
+ }
- // The node with the lowest store address.
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
- LLVMContext &Context = *DAG.getContext();
- const DataLayout &DL = DAG.getDataLayout();
-
- // Store the constants into memory as one consecutive store.
- if (IsConstantSrc) {
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
- bool NonZero = false;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
- }
+ // The node with the lowest store address.
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast) {
- LastLegalType = i+1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ // Store the constants into memory as one consecutive store.
+ if (IsConstantSrc) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 1;
+ unsigned LastLegalVectorType = 1;
+ bool LastIntegerTrunc = false;
+ bool NonZero = false;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C =
+ dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non-constant.
+ break;
+ }
+
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
+ LastIntegerTrunc = false;
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastIntegerTrunc = true;
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
- FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ unsigned Elts = i + 1;
+ if (MemVT.isVector()) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- // Check if we found a legal integer type to store.
- if (LastLegalType == 0 && LastLegalVectorType == 0)
- return false;
-
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
-
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
- }
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (LastLegalType < 2 && LastLegalVectorType < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
- // When extracting multiple vector elements, try to store them
- // in one vector store rather than a sequence of scalar stores.
- if (IsExtractVecSrc) {
- unsigned NumStoresToMerge = 0;
- bool IsVec = MemVT.isVector();
- for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- unsigned StoreValOpcode = St->getValue().getOpcode();
- // This restriction could be loosened.
- // Bail out if any stored values are not elements extracted from a vector.
- // It should be possible to handle mixed sources, but load sources need
- // more careful handling (see the block of code below that handles
- // consecutive loads).
- if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
- StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
- return false;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- // Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
+ bool Merged = MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
+ if (!Merged) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
}
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- bool IsFast;
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- NumStoresToMerge = i + 1;
+ // Remove merged stores for next iteration.
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
}
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
- false, true);
- }
-
- // Below we handle the case of multiple consecutive stores that
- // come from multiple consecutive loads. We merge them into a single
- // wide load and a single wide store.
-
- // Look for load nodes which are used by the stored values.
- SmallVector<MemOpLink, 8> LoadNodes;
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecSrc) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned NumStoresToMerge = 1;
+ bool IsVec = MemVT.isVector();
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ unsigned StoreValOpcode = St->getValue().getOpcode();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a
+ // vector. It should be possible to handle mixed sources, but load
+ // sources need more careful handling (see the block of code below that
+ // handles consecutive loads).
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+ return RV;
- // Find acceptable loads. Loads need to have the same chain (token factor),
- // must not be zext, volatile, indexed, and they must be consecutive.
- BaseIndexOffset LdBasePtr;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
- if (!Ld) break;
+ // Find a legal type for the vector store.
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ NumStoresToMerge = i + 1;
+ }
- // Loads must only have one use.
- if (!Ld->hasNUsesOfValue(1, 0))
- break;
+ bool Merged = MergeStoresOfConstantsOrVecElts(
+ StoreNodes, MemVT, NumStoresToMerge, false, true, false);
+ if (!Merged) {
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ continue;
+ }
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(),
+ StoreNodes.begin() + NumStoresToMerge);
+ RV = true;
+ continue;
+ }
- // The memory operands must not be volatile.
- if (Ld->isVolatile() || Ld->isIndexed())
- break;
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
- // We do not accept ext loads.
- if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
- break;
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
- // The stored memory type must be the same.
- if (Ld->getMemoryVT() != MemVT)
- break;
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld)
+ break;
- BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
- // If this is not the first ptr that we check.
- if (LdBasePtr.Base.getNode()) {
- // The base ptr must be the same.
- if (!LdPtr.equalBaseIndex(LdBasePtr))
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
break;
- } else {
- // Check that all other base pointers are the same as this one.
- LdBasePtr = LdPtr;
- }
- // We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
- }
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
- if (LoadNodes.size() < 2)
- return false;
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
- // If we have load/store pair instructions and we only have two values,
- // don't bother.
- unsigned RequiredAlignment;
- if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
- St->getAlignment() >= RequiredAlignment)
- return false;
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
+ break;
- LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
-
- // Scan the memory operations on the chain and find the first non-consecutive
- // load memory address. These variables hold the index in the store node
- // array.
- unsigned LastConsecutiveLoad = 0;
- // This variable refers to the size and not index in the array.
- unsigned LastLegalVectorType = 0;
- unsigned LastLegalIntegerType = 0;
- StartAddress = LoadNodes[0].OffsetFromBase;
- SDValue FirstChain = FirstLoad->getChain();
- for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads must share the same chain.
- if (LoadNodes[i].MemNode->getChain() != FirstChain)
- break;
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ // If this is not the first ptr that we check.
+ int64_t LdOffset = 0;
+ if (LdBasePtr.getBase().getNode()) {
+ // The base ptr must be the same.
+ if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
- int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- LastConsecutiveLoad = i;
- // Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
- bool IsFastSt, IsFastLd;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) && IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) && IsFastLd) {
- LastLegalVectorType = i + 1;
- }
-
- // Find a legal type for the integer store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) && IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) && IsFastLd)
- LastLegalIntegerType = i + 1;
- // Or check whether a truncstore and extload is legal.
- else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoreTy);
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
- IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
- IsFastLd)
- LastLegalIntegerType = i+1;
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdOffset));
}
- }
-
- // Only use vector types if the vector type is larger than the integer type.
- // If they are the same, use integers.
- bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
- unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
- // We add +1 here because the LastXXX variables refer to location while
- // the NumElem refers to array/index size.
- unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
- NumElem = std::min(LastLegalType, NumElem);
-
- if (NumElem < 2)
- return false;
-
- // Collect the chains from all merged stores.
- SmallVector<SDValue, 8> MergeStoreChains;
- MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+ if (LoadNodes.size() < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
- // The latest Node in the DAG.
- unsigned LatestNodeUsed = 0;
- for (unsigned i=1; i<NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother merging.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+ continue;
+ }
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ // Scan the memory operations on the chain and find the first
+ // non-consecutive load memory address. These variables hold the index in
+ // the store node array.
+ unsigned LastConsecutiveLoad = 1;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 1;
+ unsigned LastLegalIntegerType = 1;
+ bool isDereferenceable = true;
+ bool DoIntegerTruncate = false;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
- MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
- }
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+ if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
+ isDereferenceable = false;
- // Find if it is better to use vectors or integers to load and store
- // to memory.
- EVT JointMemOpVT;
- if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
- } else {
- unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
- JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
- }
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
- SDLoc LoadDL(LoadNodes[0].MemNode);
- SDLoc StoreDL(StoreNodes[0].MemNode);
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = false;
+ // Or check whether a truncstore and extload is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
+ StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) &&
+ IsFastLd) {
+ LastLegalIntegerType = i + 1;
+ DoIntegerTruncate = true;
+ }
+ }
+ }
- // The merged loads are required to have the same incoming chain, so
- // using the first's chain is acceptable.
- SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
- FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoadAlign);
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType =
+ std::max(LastLegalVectorType, LastLegalIntegerType);
- SDValue NewStoreChain =
- DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
+ NumElem = std::min(LastLegalType, NumElem);
- SDValue NewStore =
- DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ if (NumElem < 2) {
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ continue;
+ }
- // Transfer chain users from old loads to the new load.
- for (unsigned i = 0; i < NumElem; ++i) {
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
- }
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ AddToWorklist(NewStoreChain.getNode());
+
+ MachineMemOperand::Flags MMOFlags = isDereferenceable ?
+ MachineMemOperand::MODereferenceable:
+ MachineMemOperand::MONone;
+
+ SDValue NewLoad, NewStore;
+ if (UseVectorTy || !DoIntegerTruncate) {
+ NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign,
+ MMOFlags);
+ NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+ } else { // This must be the truncstore/extload case
+ EVT ExtendedTy =
+ TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
+ NewLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
+ JointMemOpVT, FirstLoadAlign, MMOFlags);
+ NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlignment(),
+ FirstInChain->getMemOperand()->getFlags());
+ }
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
- if (UseAA) {
// Replace the all stores with the new store.
for (unsigned i = 0; i < NumElem; ++i)
CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem; ++i) {
- // Remove all Store nodes.
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
+ RV = true;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ continue;
}
-
- StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
- return true;
+ return RV;
}
SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
@@ -12256,19 +13267,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && ST->isUnindexed()) {
- // FIXME: We should do this even without AA enabled. AA will just allow
- // FindBetterChain to work in more situations. The problem with this is that
- // any combine that expects memory operations to be on consecutive chains
- // first needs to be updated to look for users of the same chain.
-
+ if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
if (findBetterNeighborChains(ST)) {
@@ -12279,10 +13278,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Chain = ST->getChain();
}
- // Try transforming N to an indexed store.
- if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
- return SDValue(N, 0);
-
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
Value.getValueType().isInteger()) {
@@ -12302,8 +13297,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SimplifyDemandedBits(
Value,
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits())))
+ ST->getMemoryVT().getScalarSizeInBits()))) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -12319,14 +13321,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
- ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
- ST1->isUnindexed() && !ST1->isVolatile()) {
- // The store is dead, remove it.
- return Chain;
+ if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+ !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (ST1->getValue() == Value) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+
+ // If this is a store who's preceeding store to the same location
+ // and no one other node is chained to that store we can effectively
+ // drop the store. Do not remove stores to undef as they may be used as
+ // data sinks.
+ if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+ !ST1->getBasePtr().isUndef()) {
+ // ST1 is fully overwritten and can be elided. Combine with it's chain
+ // value.
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
}
}
@@ -12342,29 +13358,31 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
- if (!LegalTypes) {
+ if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
+ : !LegalTypes) {
for (;;) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
- bool Changed = MergeConsecutiveStores(ST, StoreNodes);
+ bool Changed = MergeConsecutiveStores(ST);
if (!Changed) break;
-
- if (any_of(StoreNodes,
- [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
- // ST has been merged and no longer exists.
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
return SDValue(N, 0);
- }
}
}
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
//
// Make sure to do this only after attempting to merge stores in order to
// avoid changing the types of some subset of stores due to visit order,
// preventing their merging.
- if (isa<ConstantFPSDNode>(Value)) {
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
if (SDValue NewSt = replaceStoreOfFPConstant(ST))
return NewSt;
}
@@ -12493,10 +13511,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
-
// Check that we know which element is being inserted
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -12511,8 +13525,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// do this only if indices are both constants and Idx1 < Idx0.
if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
&& isa<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned OtherElt =
- cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ unsigned OtherElt = InVec.getConstantOperandVal(2);
if (Elt < OtherElt) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
@@ -12523,6 +13536,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -12544,11 +13561,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
- if (InVal.getValueType() != OpVT)
- InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
- Ops[Elt] = InVal;
+ Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
}
// Return the new vector
@@ -12568,6 +13581,11 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
+ ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
+ ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
Align = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
@@ -12639,6 +13657,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
EVT NVT = N->getValueType(0);
+ if (InVec.isUndef())
+ return DAG.getUNDEF(NVT);
+
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
@@ -13022,7 +14043,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx) {
@@ -13088,6 +14109,11 @@ SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
+ } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
+ InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
@@ -13293,6 +14319,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return Shuffles[0];
}
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations which can be matched to a truncate.
+SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
+ // TODO: Add support for big-endian.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+ if (N->getNumOperands() < 2)
+ return SDValue();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumElems = N->getNumOperands();
+
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
+ // index, bail out.
+ // TODO: Allow undef elements in some cases?
+ if (any_of(N->ops(), [VT](SDValue Op) {
+ return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ Op.getValueType() != VT.getVectorElementType();
+ }))
+ return SDValue();
+
+ // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
+ auto GetExtractIdx = [](SDValue Extract) {
+ return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
+ };
+
+ // The first BUILD_VECTOR operand must be an an extract from index zero
+ // (assuming no undef and little-endian).
+ if (GetExtractIdx(N->getOperand(0)) != 0)
+ return SDValue();
+
+ // Compute the stride from the first index.
+ int Stride = GetExtractIdx(N->getOperand(1));
+ SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
+
+ // Proceed only if the stride and the types can be matched to a truncate.
+ if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
+ (ExtractedFromVec.getValueType().getVectorNumElements() !=
+ Stride * NumElems) ||
+ (VT.getScalarSizeInBits() * Stride > 64))
+ return SDValue();
+
+ // Check remaining operands are consistent with the computed stride.
+ for (unsigned i = 1; i != NumElems; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if ((Op.getOperand(0) != ExtractedFromVec) ||
+ (GetExtractIdx(Op) != Stride * i))
+ return SDValue();
+ }
+
+ // All checks were ok, construct the truncate.
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewVT = VT.getVectorVT(
+ Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
+ EVT TruncVT =
+ VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
+
+ SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
+ Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
+ return DAG.getBitcast(VT, Res);
+}
+
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -13300,12 +14393,45 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
+ if (TLI.isDesirableToCombineBuildVectorToTruncate())
+ if (SDValue V = reduceBuildVecToTrunc(N))
+ return V;
+
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -13419,7 +14545,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
- int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ int ExtIdx = Op.getConstantOperandVal(1);
// Ensure that we are extracting a subvector from a vector the same
// size as the result.
@@ -13491,8 +14617,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
- VT.getSizeInBits() / SclTy.getSizeInBits());
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
@@ -13607,19 +14736,153 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+/// If we are extracting a subvector produced by a wide binary operator with at
+/// at least one operand that was the result of a vector concatenation, then try
+/// to use the narrow vector operands directly to avoid the concatenation and
+/// extraction.
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
+ // some of these bailouts with other transforms.
+
+ // The extract index must be a constant, so we can map it to a concat operand.
+ auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!ExtractIndex)
+ return SDValue();
+
+ // Only handle the case where we are doubling and then halving. A larger ratio
+ // may require more than two narrow binops to replace the wide binop.
+ EVT VT = Extract->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
+ "Extract index is not a multiple of the vector length.");
+ if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
+ return SDValue();
+
+ // We are looking for an optionally bitcasted wide vector binary operator
+ // feeding an extract subvector.
+ SDValue BinOp = Extract->getOperand(0);
+ if (BinOp.getOpcode() == ISD::BITCAST)
+ BinOp = BinOp.getOperand(0);
+
+ // TODO: The motivating case for this transform is an x86 AVX1 target. That
+ // target has temptingly almost legal versions of bitwise logic ops in 256-bit
+ // flavors, but no other 256-bit integer support. This could be extended to
+ // handle any binop, but that may require fixing/adding other folds to avoid
+ // codegen regressions.
+ unsigned BOpcode = BinOp.getOpcode();
+ if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
+ return SDValue();
+
+ // The binop must be a vector type, so we can chop it in half.
+ EVT WideBVT = BinOp.getValueType();
+ if (!WideBVT.isVector())
+ return SDValue();
+
+ // Bail out if the target does not support a narrower version of the binop.
+ EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
+ WideBVT.getVectorNumElements() / 2);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
+ return SDValue();
+
+ // Peek through bitcasts of the binary operator operands if needed.
+ SDValue LHS = BinOp.getOperand(0);
+ if (LHS.getOpcode() == ISD::BITCAST)
+ LHS = LHS.getOperand(0);
+
+ SDValue RHS = BinOp.getOperand(1);
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ // We need at least one concatenation operation of a binop operand to make
+ // this transform worthwhile. The concat must double the input vector sizes.
+ // TODO: Should we also handle INSERT_SUBVECTOR patterns?
+ bool ConcatL =
+ LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
+ bool ConcatR =
+ RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
+ if (!ConcatL && !ConcatR)
+ return SDValue();
+
+ // If one of the binop operands was not the result of a concat, we must
+ // extract a half-sized operand for our new narrow binop. We can't just reuse
+ // the original extract index operand because we may have bitcasted.
+ unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
+ unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
+ EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
+ SDLoc DL(Extract);
+
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
+ SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0),
+ DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+
+ SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1),
+ DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+}
+
+/// If we are extracting a subvector from a wide vector load, convert to a
+/// narrow load to eliminate the extraction:
+/// (extract_subvector (load wide vector)) --> (load narrow vector)
+static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Add support for big-endian. The offset calculation must be adjusted.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ // TODO: The one-use check is overly conservative. Check the cost of the
+ // extract instead or remove that condition entirely.
+ auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
+ !ExtIdx)
+ return SDValue();
+
+ // The narrow load will be offset from the base address of the old load if
+ // we are extracting from something besides index 0 (little-endian).
+ EVT VT = Extract->getValueType(0);
+ SDLoc DL(Extract);
+ SDValue BaseAddr = Ld->getOperand(1);
+ unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+
+ // TODO: Use "BaseIndexOffset" to make this more effective.
+ SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
+ VT.getStoreSize());
+ SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
+ DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
+ return NewLd;
+}
+
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V->getOperand(0).getValueType() != NVT)
- return SDValue();
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
+ return NarrowLoad;
+
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ V->getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -13633,19 +14896,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
- // being extracted are of same type, and are half size of larger vectors.
- EVT BigVT = V->getOperand(0).getValueType();
+ // being extracted are of same size.
EVT SmallVT = V->getOperand(1).getValueType();
- if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants with the same type.
+ // Only handle cases where both indexes are constants.
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx && ExtIdx &&
- InsIdx->getValueType(0).getSizeInBits() <= 64 &&
- ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
@@ -13661,6 +14921,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ return NarrowBOp;
+
return SDValue();
}
@@ -13892,6 +15155,167 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue N0 = SVN->getOperand(0);
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ if (!isAnyExtend(Scale))
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
+ DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ }
+
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = SVN->getOperand(0);
+ while (N0.getOpcode() == ISD::BITCAST)
+ N0 = N0.getOperand(0);
+
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+ unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
+
+ if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
+ return SDValue();
+ unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // We can remove *extend_vector_inreg only if the truncation happens at
+ // the same scale as the extension.
+ if (isTruncate(ExtScale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
+// Combine shuffles of splat-shuffles of the form:
+// shuffle (shuffle V, undef, splat-mask), undef, M
+// If splat-mask contains undef elements, we need to be careful about
+// introducing undef's in the folded mask which are not the result of composing
+// the masks of the shuffles.
+static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
+ ShuffleVectorSDNode *Splat,
+ SelectionDAG &DAG) {
+ ArrayRef<int> SplatMask = Splat->getMask();
+ assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+
+ // Prefer simplifying to the splat-shuffle, if possible. This is legal if
+ // every undef mask element in the splat-shuffle has a corresponding undef
+ // element in the user-shuffle's mask or if the composition of mask elements
+ // would result in undef.
+ // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
+ // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
+ // In this case it is not legal to simplify to the splat-shuffle because we
+ // may be exposing the users of the shuffle an undef element at index 1
+ // which was not there before the combine.
+ // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
+ // In this case the composition of masks yields SplatMask, so it's ok to
+ // simplify to the splat-shuffle.
+ // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
+ // In this case the composed mask includes all undef elements of SplatMask
+ // and in addition sets element zero to undef. It is safe to simplify to
+ // the splat-shuffle.
+ auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
+ ArrayRef<int> SplatMask) {
+ for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
+ if (UserMask[i] != -1 && SplatMask[i] == -1 &&
+ SplatMask[UserMask[i]] != -1)
+ return false;
+ return true;
+ };
+ if (CanSimplifyToExistingSplat(UserMask, SplatMask))
+ return SDValue(Splat, 0);
+
+ // Create a new shuffle with a mask that is composed of the two shuffles'
+ // masks.
+ SmallVector<int, 32> NewMask;
+ for (int Idx : UserMask)
+ NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
+
+ return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
+ Splat->getOperand(0), Splat->getOperand(1),
+ NewMask);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13938,6 +15362,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
+ // A shuffle of a single vector that is a splat can always be folded.
+ if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
+ if (N1->isUndef() && N0Shuf->isSplat())
+ return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
@@ -13996,6 +15425,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
return S;
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.isUndef() ||
@@ -14253,6 +15690,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -14262,26 +15709,39 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
- if (N0.getValueType() != N1.getValueType())
+ if (!isa<ConstantSDNode>(N2))
return SDValue();
+ unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = N0.getConstantOperandVal(2);
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
// If the input vector is a concatenation, and the insert replaces
- // one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
- N2.getOpcode() == ISD::Constant) {
- APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
-
- // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors Z, Y)
- if (InsIdx == 0)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
- N0.getOperand(1));
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType()) {
+ unsigned Factor = N1.getValueType().getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
- // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors X, Z)
- if (InsIdx == VT.getVectorNumElements() / 2)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
- N1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
return SDValue();
@@ -14366,9 +15826,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
// Extract the sub element from the constant bit mask.
if (DAG.getDataLayout().isBigEndian()) {
- Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
} else {
- Bits = Bits.lshr(SubIdx * NumSubBits);
+ Bits.lshrInPlace(SubIdx * NumSubBits);
}
if (Split > 1)
@@ -15041,7 +16501,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -15096,7 +16556,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
/// As a result, we precompute A/2 prior to the iteration loop.
SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
unsigned Iterations,
- SDNodeFlags *Flags, bool Reciprocal) {
+ SDNodeFlags Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
@@ -15140,7 +16600,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
unsigned Iterations,
- SDNodeFlags *Flags, bool Reciprocal) {
+ SDNodeFlags Flags, bool Reciprocal) {
EVT VT = Arg.getValueType();
SDLoc DL(Arg);
SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -15185,7 +16645,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
/// Op can be zero.
-SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
bool Reciprocal) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -15238,17 +16698,17 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
return SDValue();
}
-SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
return buildSqrtEstimateImpl(Op, Flags, true);
}
-SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
return buildSqrtEstimateImpl(Op, Flags, false);
}
/// Return true if base is a frame index, which is known not to alias with
/// anything but itself. Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
@@ -15257,7 +16717,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
if (Base.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
Base = Base.getOperand(0);
- Offset += C->getZExtValue();
+ Offset += C->getSExtValue();
}
}
@@ -15300,54 +16760,82 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (Op1->isInvariant() && Op0->writeMem())
return false;
+ unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
+ unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+
+ // Check for BaseIndexOffset matching.
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
+ int64_t PtrDiff;
+ if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
+ return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex())))
+ return false;
+ }
+
+ // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
+ // modified to use BaseIndexOffset.
+
// Gather base node and offset information.
- SDValue Base1, Base2;
- int64_t Offset1, Offset2;
- const GlobalValue *GV1, *GV2;
- const void *CV1, *CV2;
- bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+ SDValue Base0, Base1;
+ int64_t Offset0, Offset1;
+ const GlobalValue *GV0, *GV1;
+ const void *CV0, *CV1;
+ bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(),
+ Base0, Offset0, GV0, CV0);
+ bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(),
Base1, Offset1, GV1, CV1);
- bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
- Base2, Offset2, GV2, CV2);
- // If they have a same base address then check to see if they overlap.
- if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
- return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
- (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+ // If they have the same base address, then check to see if they overlap.
+ if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
+ return !((Offset0 + NumBytes0) <= Offset1 ||
+ (Offset1 + NumBytes1) <= Offset0);
// It is possible for different frame indices to alias each other, mostly
// when tail call optimization reuses return address slots for arguments.
// To catch this case, look up the actual index of frame indices to compute
// the real alias relationship.
- if (isFrameIndex1 && isFrameIndex2) {
+ if (IsFrameIndex0 && IsFrameIndex1) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex());
Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
- Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
- return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
- (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+ return !((Offset0 + NumBytes0) <= Offset1 ||
+ (Offset1 + NumBytes1) <= Offset0);
}
// Otherwise, if we know what the bases are, and they aren't identical, then
// we know they cannot alias.
- if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1))
return false;
// If we know required SrcValue1 and SrcValue2 have relatively large alignment
// compared to the size and offset of the access, we may be able to prove they
- // do not alias. This check is conservative for now to catch cases created by
+ // do not alias. This check is conservative for now to catch cases created by
// splitting vector types.
- if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
- (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
- (Op0->getMemoryVT().getSizeInBits() >> 3 ==
- Op1->getMemoryVT().getSizeInBits() >> 3) &&
- (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
- int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
- int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
+ int64_t SrcValOffset0 = Op0->getSrcValueOffset();
+ int64_t SrcValOffset1 = Op1->getSrcValueOffset();
+ unsigned OrigAlignment0 = Op0->getOriginalAlignment();
+ unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+ if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
+ NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+ int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
+ int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of similar
- // size, return no alias.
- if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
- (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
+ // size. Return no alias.
+ if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
+ (OffAlign1 + NumBytes1) <= OffAlign0)
return false;
}
@@ -15359,20 +16847,18 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
UseAA = false;
#endif
- if (UseAA &&
+
+ if (UseAA && AA &&
Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information.
- int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
- Op1->getSrcValueOffset());
- int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
- Op0->getSrcValueOffset() - MinOffset;
- int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
- Op1->getSrcValueOffset() - MinOffset;
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
AliasResult AAResult =
- AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
- UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
- UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+ AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
+ UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+ MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
+ UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
if (AAResult == NoAlias)
return false;
}
@@ -15454,6 +16940,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
++Depth;
break;
+ case ISD::CopyFromReg:
+ // Forward past CopyFromReg.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ break;
+
default:
// For all other instructions we will just have to take what we can get.
Aliases.push_back(Chain);
@@ -15482,17 +16974,29 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because MergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running MergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
// We must have a base and an offset.
- if (!BasePtr.Base.getNode())
+ if (!BasePtr.getBase().getNode())
return false;
// Do not handle stores to undef base pointers.
- if (BasePtr.Base.isUndef())
+ if (BasePtr.getBase().isUndef())
return false;
SmallVector<StoreSDNode *, 8> ChainedStores;
@@ -15514,13 +17018,11 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
// Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
+ if (!BasePtr.equalBaseIndex(Ptr, DAG))
break;
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
+ // Walk up the chain to find the next store node, ignoring any
+ // intermediate loads. Any other kind of node will halt the loop.
SDNode *NextInChain = Index->getChain().getNode();
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
@@ -15539,9 +17041,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- }
+ } // end while
}
+ // At this point, ChainedStores lists all of the Store nodes
+ // reachable by iterating up through chain nodes matching the above
+ // conditions. For each such store identified, try to find an
+ // earlier chain to attach the store to which won't violate the
+ // required ordering.
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
@@ -15565,7 +17072,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
}
/// This is the entry point for the file.
-void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index e2f33bb..b2599b2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,35 +39,76 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -78,21 +119,6 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
"target-specific selector");
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
-void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
-}
-
/// Set the current block to which generated machine instructions will be
/// appended, and clear the local CSE map.
void FastISel::startNewBlock() {
@@ -231,17 +257,13 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
// Try to emit the constant by using an integer constant with a cast.
const APFloat &Flt = CF->getValueAPF();
EVT IntVT = TLI.getPointerTy(DL);
-
- uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
+ APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
bool isExact;
- (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
if (isExact) {
- APInt IntVal(IntBitWidth, x);
-
unsigned IntegerReg =
- getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg != 0)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
/*Kill=*/false);
@@ -600,7 +622,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
// have to worry about calling conventions and target-specific lowering code.
// Instead we perform the call lowering right here.
//
- // CALLSEQ_START(0...)
+ // CALLSEQ_START(0, 0...)
// STACKMAP(id, nbytes, ...)
// CALLSEQ_END(0, 0)
//
@@ -646,7 +668,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::STACKMAP));
for (auto const &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
@@ -672,10 +694,8 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
Args.reserve(NumArgs);
// Populate the argument list.
- // Attributes for args start at offset 1, after the return attribute.
ImmutableCallSite CS(CI);
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
- ArgI != ArgE; ++ArgI) {
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
Value *V = CI->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
@@ -683,7 +703,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -826,7 +846,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
TII.get(TargetOpcode::PATCHPOINT));
for (auto &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
@@ -841,9 +861,28 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return true;
}
-/// Returns an AttributeSet representing the attributes applied to the return
+bool FastISel::selectXRayCustomEvent(const CallInst *I) {
+ const auto &Triple = TM.getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return true; // don't do anything to this instruction.
+ SmallVector<MachineOperand, 8> Ops;
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
+ /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
+ /*IsDef=*/false));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
+ for (auto &MO : Ops)
+ MIB.add(MO);
+ // Insert the Patchable Event Call instruction, that gets lowered properly.
+ return true;
+}
+
+
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -852,8 +891,8 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
@@ -885,9 +924,10 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI + 1);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
+ TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
CallLoweringInfo CLI;
CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
@@ -1021,7 +1061,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
Entry.Ty = V->getType();
// Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
Args.push_back(Entry);
}
@@ -1110,16 +1150,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- unsigned Offset = 0;
+ // Byval arguments with frame indices were already handled after argument
+ // lowering and before isel.
+ const auto *Arg =
+ dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
+ if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ return true;
+
Optional<MachineOperand> Op;
- if (const auto *Arg = dyn_cast<Argument>(Address))
- // Some arguments' frame index is recorded during argument lowering.
- Offset = FuncInfo.getArgumentFrameIndex(Arg);
- if (Offset)
- Op = MachineOperand::CreateFI(Offset);
- if (!Op)
- if (unsigned Reg = lookUpRegForValue(Address))
- Op = MachineOperand::CreateReg(Reg, false);
+ if (unsigned Reg = lookUpRegForValue(Address))
+ Op = MachineOperand::CreateReg(Reg, false);
// If we have a VLA that has a "use" in a metadata node that's then used
// here but it has no other uses, then we have a problem. E.g.,
@@ -1143,13 +1183,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
if (Op->isReg()) {
Op->setIsDebug(true);
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
- DI->getVariable(), DI->getExpression());
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
+ Op->getReg(), 0, DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(0)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
@@ -1229,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
return selectPatchpoint(II);
+
+ case Intrinsic::xray_customevent:
+ return selectXRayCustomEvent(II);
}
return fastLowerIntrinsicCall(II);
@@ -1362,7 +1407,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (const auto *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
// As a special case, don't handle calls to builtin library functions that
// may be translated directly to target instructions.
@@ -1665,7 +1710,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
SkipTargetIndependentISel(SkipTargetIndependentISel) {}
-FastISel::~FastISel() {}
+FastISel::~FastISel() = default;
bool FastISel::fastLowerArguments() { return false; }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 377a523..b736037 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF = &mf;
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- MachineModuleInfo &MMI = MF->getMMI();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
@@ -214,33 +213,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
InitializeRegForValue(&I);
- // Collect llvm.dbg.declare information. This is done now instead of
- // during the initial isel pass through the IR so that it is done
- // in a predictable order.
- if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
- assert(DI->getVariable() && "Missing variable");
- assert(DI->getDebugLoc() && "Missing location");
- if (MMI.hasDebugInfo()) {
- // Don't handle byval struct arguments or VLAs, for example.
- // Non-byval arguments are handled here (they refer to the stack
- // temporary alloca at this point).
- const Value *Address = DI->getAddress();
- if (Address) {
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
- Address = BCI->getOperand(0);
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
- DenseMap<const AllocaInst *, int>::iterator SI =
- StaticAllocaMap.find(AI);
- if (SI != StaticAllocaMap.end()) { // Check for VLAs.
- int FI = SI->second;
- MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
- FI, DI->getDebugLoc());
- }
- }
- }
- }
- }
-
// Decide the preferred extend type for a value.
PreferredExtendType[&I] = getPreferredExtendForValue(&I);
}
@@ -400,10 +372,9 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (!LOI->IsValid)
return nullptr;
- if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ if (BitWidth > LOI->Known.getBitWidth()) {
LOI->NumSignBits = 1;
- LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
- LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ LOI->Known = LOI->Known.zextOrTrunc(BitWidth);
}
return LOI;
@@ -436,17 +407,15 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
Value *V = PN->getIncomingValue(0);
if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
DestLOI.NumSignBits = 1;
- APInt Zero(BitWidth, 0);
- DestLOI.KnownZero = Zero;
- DestLOI.KnownOne = Zero;
+ DestLOI.Known = KnownBits(BitWidth);
return;
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
APInt Val = CI->getValue().zextOrTrunc(BitWidth);
DestLOI.NumSignBits = Val.getNumSignBits();
- DestLOI.KnownZero = ~Val;
- DestLOI.KnownOne = Val;
+ DestLOI.Known.Zero = ~Val;
+ DestLOI.Known.One = Val;
} else {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
@@ -463,25 +432,23 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
DestLOI = *SrcLOI;
}
- assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
- DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ assert(DestLOI.Known.Zero.getBitWidth() == BitWidth &&
+ DestLOI.Known.One.getBitWidth() == BitWidth &&
"Masks should have the same bit width as the type.");
for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
DestLOI.NumSignBits = 1;
- APInt Zero(BitWidth, 0);
- DestLOI.KnownZero = Zero;
- DestLOI.KnownOne = Zero;
+ DestLOI.Known = KnownBits(BitWidth);
return;
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
APInt Val = CI->getValue().zextOrTrunc(BitWidth);
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
- DestLOI.KnownZero &= ~Val;
- DestLOI.KnownOne &= Val;
+ DestLOI.Known.Zero &= ~Val;
+ DestLOI.Known.One &= Val;
continue;
}
@@ -498,8 +465,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
}
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
- DestLOI.KnownZero &= SrcLOI->KnownZero;
- DestLOI.KnownOne &= SrcLOI->KnownOne;
+ DestLOI.Known.Zero &= SrcLOI->Known.Zero;
+ DestLOI.Known.One &= SrcLOI->Known.One;
}
}
@@ -515,12 +482,11 @@ void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
/// If the argument does not have any assigned frame index then 0 is
/// returned.
int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
- DenseMap<const Argument *, int>::iterator I =
- ByValArgFrameIndexMap.find(A);
+ auto I = ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
- return 0;
+ return INT_MAX;
}
unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
@@ -557,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
}
+
+std::pair<unsigned, bool>
+FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+ auto It = SwiftErrorVRegDefUses.find(Key);
+ if (It == SwiftErrorVRegDefUses.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ SwiftErrorVRegDefUses[Key] = VReg;
+ return std::make_pair(VReg, true);
+ }
+ return std::make_pair(It->second, false);
+}
+
+std::pair<unsigned, bool>
+FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+ auto It = SwiftErrorVRegDefUses.find(Key);
+ if (It == SwiftErrorVRegDefUses.end()) {
+ unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
+ SwiftErrorVRegDefUses[Key] = VReg;
+ return std::make_pair(VReg, true);
+ }
+ return std::make_pair(It->second, false);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4a9042c..b96c96f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -161,7 +161,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (VRBase) {
DstRC = MRI->getRegClass(VRBase);
} else if (UseRC) {
- assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ assert(TRI->isTypeLegalForClass(*UseRC, VT) &&
+ "Incompatible phys register def and uses!");
DstRC = UseRC;
} else {
DstRC = TLI->getRegClassFor(VT);
@@ -235,7 +236,6 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
- unsigned NumResults = CountResults(Node);
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
@@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
} else
AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
- // Add the subregster being inserted
+ // Add the subregister being inserted
AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
MIB.addImm(SubIdx);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b002825..7e4bc3c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -899,6 +899,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
+static TargetLowering::LegalizeAction
+getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
+ unsigned EqOpc;
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected FP pseudo-opcode");
+ case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
+ case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
+ }
+
+ auto Action = TLI.getOperationAction(EqOpc, VT);
+
+ // We don't currently handle Custom or Promote for strict FP pseudo-ops.
+ // For now, we just expand for those cases.
+ if (Action != TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+
+ return Action;
+}
+
/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
@@ -994,7 +1023,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
break;
case ISD::EXTRACT_ELEMENT:
case ISD::FLT_ROUNDS_:
- case ISD::FPOWI:
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -1043,6 +1071,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FPOW:
+ case ISD::STRICT_FPOWI:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FLOG2:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
+ // These pseudo-ops get legalized as if they were their non-strict
+ // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
+ // is also legal, but if ISD::FSQRT requires expansion then so does
+ // ISD::STRICT_FSQRT.
+ Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
+ Node->getValueType(0));
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -1192,8 +1239,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
- // we will make the index dependent on the load).
- if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
+ // we will make the index dependent on the load). Also, the store might be
+ // dependent on the extractelement and introduce a cycle when creating
+ // the load.
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
+ ST->hasPredecessor(Op.getNode()))
continue;
StackPtr = ST->getBasePtr();
@@ -1340,7 +1390,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
// Convert to an integer of the same size.
if (TLI.isTypeLegal(IVT)) {
State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
- State.SignMask = APInt::getSignBit(NumBits);
+ State.SignMask = APInt::getSignMask(NumBits);
State.SignBit = NumBits - 1;
return;
}
@@ -1490,7 +1540,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
@@ -1909,8 +1959,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1935,9 +1985,14 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned)
+ .setIsPostTypeLegalization(true);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1960,8 +2015,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1970,9 +2025,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned)
+ .setIsPostTypeLegalization(true);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1994,8 +2053,8 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2004,9 +2063,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2019,6 +2081,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2081,8 +2146,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
@@ -2090,8 +2155,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = FIPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2099,9 +2164,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2126,19 +2194,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
return TLI.getLibcallName(LC) != nullptr;
}
-/// Return true if sincos libcall is available and can be used to combine sin
-/// and cos.
-static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
- const TargetMachine &TM) {
- if (!isSinCosLibcallAvailable(Node, TLI))
- return false;
- // GNU sin/cos functions set errno while sincos does not. Therefore
- // combining sin and cos is only safe if unsafe-fpmath is enabled.
- if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath)
- return false;
- return true;
-}
-
/// Only issue sincos libcall if both sin and cos are needed.
static bool useSinCos(SDNode *Node) {
unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
@@ -2185,24 +2240,24 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Pass the argument.
Entry.Node = Node->getOperand(0);
Entry.Ty = RetTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2210,9 +2265,9 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args));
+ CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
+ TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args));
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2529,12 +2584,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
for (unsigned J = 0; J != Sz; J += 8) {
- MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
- MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
- MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
- MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
- MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
- MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+ MaskHi4 = MaskHi4 | (0xF0ull << J);
+ MaskLo4 = MaskLo4 | (0x0Full << J);
+ MaskHi2 = MaskHi2 | (0xCCull << J);
+ MaskLo2 = MaskLo2 | (0x33ull << J);
+ MaskHi1 = MaskHi1 | (0xAAull << J);
+ MaskLo1 = MaskLo1 | (0x55ull << J);
}
// BSWAP if the type is wider than a single byte.
@@ -2573,7 +2628,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
APInt Shift(Sz, 1);
- Shift = Shift.shl(J);
+ Shift <<= J;
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
}
@@ -2968,7 +3023,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT NVT = Node->getValueType(0);
APFloat apf(DAG.EVTToAPFloatSemantics(VT),
APInt::getNullValue(VT.getSizeInBits()));
- APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ APInt x = APInt::getSignMask(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, dl, VT);
Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
@@ -3091,7 +3146,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
- Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ Tmp1 = DAG.getBuildVector(VT, dl, Ops);
// We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
@@ -3181,7 +3236,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
// fcos which share the same operand and both are used.
if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
- canCombineSinCosLibcall(Node, TLI, TM))
+ isSinCosLibcallAvailable(Node, TLI))
&& useSinCos(Node)) {
SDVTList VTs = DAG.getVTList(VT, VT);
Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
@@ -3237,7 +3292,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
- const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
+ const SDNodeFlags Flags = Node->getFlags();
Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
Results.push_back(Tmp1);
@@ -3477,17 +3532,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LC = RTLIB::MUL_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getSizeInBits();
- SDValue HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- SDValue HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
// Here we're passing the 2 arguments explicitly as 4 arguments that are
// pre-lowered to the correct types. This all depends upon WideVT not
@@ -3505,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
}
- BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
- DAG.getIntPtrConstant(0, dl));
- TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
- DAG.getIntPtrConstant(1, dl));
- // Ret is a node with an illegal type. Because such things are not
- // generally permitted during this phase of legalization, make sure the
- // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
- // folded.
- assert(Ret->use_empty() &&
- "Unexpected uses of illegally type from expanded lib call.");
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
}
if (isSigned) {
@@ -3790,8 +3846,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
- SDValue Result =
- DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars);
+
+ SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -3830,10 +3886,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3870,10 +3927,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "abort", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3890,16 +3947,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMAX_PPCF128));
break;
case ISD::FSQRT:
+ case ISD::STRICT_FSQRT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128));
break;
case ISD::FSIN:
+ case ISD::STRICT_FSIN:
Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
RTLIB::SIN_F80, RTLIB::SIN_F128,
RTLIB::SIN_PPCF128));
break;
case ISD::FCOS:
+ case ISD::STRICT_FCOS:
Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128));
@@ -3909,26 +3969,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandSinCosLibCall(Node, Results);
break;
case ISD::FLOG:
+ case ISD::STRICT_FLOG:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
RTLIB::LOG_F80, RTLIB::LOG_F128,
RTLIB::LOG_PPCF128));
break;
case ISD::FLOG2:
+ case ISD::STRICT_FLOG2:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
RTLIB::LOG2_F80, RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128));
break;
case ISD::FLOG10:
+ case ISD::STRICT_FLOG10:
Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
RTLIB::LOG10_F80, RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128));
break;
case ISD::FEXP:
+ case ISD::STRICT_FEXP:
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
RTLIB::EXP_F80, RTLIB::EXP_F128,
RTLIB::EXP_PPCF128));
break;
case ISD::FEXP2:
+ case ISD::STRICT_FEXP2:
Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
RTLIB::EXP2_F80, RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128));
@@ -3949,11 +4014,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::CEIL_PPCF128));
break;
case ISD::FRINT:
+ case ISD::STRICT_FRINT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
RTLIB::RINT_F80, RTLIB::RINT_F128,
RTLIB::RINT_PPCF128));
break;
case ISD::FNEARBYINT:
+ case ISD::STRICT_FNEARBYINT:
Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
@@ -3968,11 +4035,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUND_PPCF128));
break;
case ISD::FPOWI:
+ case ISD::STRICT_FPOWI:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
RTLIB::POWI_F80, RTLIB::POWI_F128,
RTLIB::POWI_PPCF128));
break;
case ISD::FPOW:
+ case ISD::STRICT_FPOW:
Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
RTLIB::POW_F80, RTLIB::POW_F128,
RTLIB::POW_PPCF128));
@@ -4170,6 +4239,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ReplacedNode(Node);
break;
}
+ case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
case ISD::UDIV:
@@ -4424,8 +4494,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
NewOps.push_back(Elt);
}
- SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
-
+ SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
break;
}
@@ -4519,6 +4588,14 @@ void SelectionDAG::Legalize() {
AssignTopologicalOrder();
SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ // Use a delete listener to remove nodes which were deleted during
+ // legalization from LegalizeNodes. This is needed to handle the situation
+ // where a new node is allocated by the object pool to the same address of a
+ // previously deleted node.
+ DAGNodeDeletedListener DeleteListener(
+ *this,
+ [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); });
+
SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
// Visit all the nodes. We start in topological order, so that we see
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72b56d8..eaf177d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -72,7 +72,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
case ISD::EXTRACT_VECTOR_ELT:
- R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
@@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}
- // If R is null, the sub-method took care of registering the result.
- if (R.getNode()) {
+ if (R.getNode() && R.getNode() != N) {
SetSoftenedFloat(SDValue(N, ResNo), R);
- ReplaceSoftenFloatResult(N, ResNo, R);
+ // Return true only if the node is changed, assuming that the operands
+ // are also converted when necessary.
+ return true;
}
- // Return true only if the node is changed,
- // assuming that the operands are also converted when necessary.
+
// Otherwise, return false to tell caller to scan operands.
- return R.getNode() && R.getNode() != N;
+ return false;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
@@ -171,7 +171,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
}
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, keep the extracted value in register.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
NewOp.getValueType().getVectorElementType(),
@@ -459,7 +462,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
- SoftenFloatResult(Op.getNode(), 0);
+ AddToWorklist(Op.getNode());
}
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
@@ -472,8 +475,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
}
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
- Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
@@ -752,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;
+ case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
+ case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE:
@@ -790,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
return false;
- // When the operand type can be kept in registers, SoftenFloatResult
- // will call ReplaceValueWith to replace all references and we can
- // skip softening this operand.
+
+ // When the operand type can be kept in registers there is nothing to do for
+ // the following opcodes.
switch (N->getOperand(OpNo).getOpcode()) {
case ISD::BITCAST:
case ISD::ConstantFP:
@@ -806,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC:
return true;
}
- // For some opcodes, SoftenFloatResult handles all conversion of softening
- // and replacing operands, so that there is no need to soften operands
- // again, although such opcode could be scanned for other illegal operands.
+
switch (N->getOpcode()) {
- case ISD::ConstantFP:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- case ISD::FABS:
- case ISD::FCOPYSIGN:
- case ISD::FNEG:
- case ISD::Register:
- case ISD::SELECT:
+ case ISD::ConstantFP: // Leaf node.
+ case ISD::CopyFromReg: // Operand is a register that we know to be left
+ // unchanged by SoftenFloatResult().
+ case ISD::Register: // Leaf node.
return true;
}
return false;
@@ -828,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+ SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
+
+ if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
+ return SDValue();
+
+ if (N->getNumOperands() == 3)
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
+
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
+ N->getOperand(3)),
+ 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
// If we get here, the result must be legal but the source illegal.
EVT SVT = N->getOperand(0).getValueType();
@@ -883,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (Op == N->getOperand(0))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+
+ if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (Op == N->getOperand(0))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
EVT SVT = N->getOperand(0).getValueType();
@@ -912,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {
+ SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
+ SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
+
+ if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
+ return SDValue();
+
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),
+ 0);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -1054,15 +1108,15 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- assert(NVT.getSizeInBits() == integerPartWidth &&
+ assert(NVT.getSizeInBits() == 64 &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
SDLoc dl(N);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[1])),
+ APInt(64, C.getRawData()[1])),
dl, NVT);
Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[0])),
+ APInt(64, C.getRawData()[0])),
dl, NVT);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index dc436ce..75fec7b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -21,6 +21,7 @@
#include "LegalizeTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -134,6 +135,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULO:
case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -510,9 +514,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
EVT ValueVTs[] = { N->getValueType(0), NVT };
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
+ unsigned NumOps = N->getNumOperands();
+ assert(NumOps <= 3 && "Too many operands");
+ if (NumOps == 3)
+ Ops[2] = N->getOperand(2);
+
SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
- DAG.getVTList(ValueVTs), Ops);
+ DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
@@ -606,9 +615,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
N->getOperand(2));
- assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
// Convert to the expected type.
- return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+ return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
@@ -690,7 +698,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
case TargetLowering::TypePromoteInteger:
Res = GetPromotedInteger(InOp);
break;
- case TargetLowering::TypeSplitVector:
+ case TargetLowering::TypeSplitVector: {
EVT InVT = InOp.getValueType();
assert(InVT.isVector() && "Cannot split scalar types");
unsigned NumElts = InVT.getVectorNumElements();
@@ -709,6 +717,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
+ case TargetLowering::TypeWidenVector: {
+ SDValue WideInOp = GetWidenedVector(InOp);
+
+ // Truncate widened InOp.
+ unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getValueType(0).getScalarType(), NumElem);
+ SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
+
+ // Zero extend so that the elements are of same type as those of NVT
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
+ NumElem);
+ SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
+
+ // Extract the low NVT subvector.
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
+ }
+ }
// Truncate to NVT instead of VT
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
@@ -742,6 +770,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+ llvm_unreachable("Not implemented");
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
// Promote the overflow bit trivially.
if (ResNo == 1)
@@ -904,6 +938,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -1089,6 +1126,10 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
SDValue Cond = N->getOperand(0);
EVT OpTy = N->getOperand(1).getValueType();
+ if (N->getOpcode() == ISD::VSELECT)
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
Cond = PromoteTargetBoolean(Cond, OpVT);
@@ -1252,6 +1293,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
N->getOperand(0).getValueType().getScalarType());
}
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDLoc DL(N);
+
+ auto VT = getSetCCResultType(LHS.getValueType());
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT);
+ switch (BoolType) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT);
+ break;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Carry = DAG.getZExtOrTrunc(Carry, DL, VT);
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Carry = DAG.getSExtOrTrunc(Carry, DL, VT);
+ break;
+ }
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
+}
//===----------------------------------------------------------------------===//
// Integer Result Expansion
@@ -1371,6 +1436,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDE:
case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@@ -1501,11 +1569,11 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
- APInt KnownZero, KnownOne;
- DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne);
+ KnownBits Known;
+ DAG.computeKnownBits(N->getOperand(1), Known);
// If we don't know anything about the high bits, exit.
- if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ if (((Known.Zero|Known.One) & HighBitMask) == 0)
return false;
// Get the incoming operand to be shifted.
@@ -1514,7 +1582,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
// If we know that any of the high bits of the shift amount are one, then we
// can do this as a couple of simple shifts.
- if (KnownOne.intersects(HighBitMask)) {
+ if (Known.One.intersects(HighBitMask)) {
// Mask out the high bit, which we know is set.
Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
DAG.getConstant(~HighBitMask, dl, ShTy));
@@ -1539,7 +1607,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
// If we know that all of the high bits of the shift amount are zero, then we
// can do this as a couple of simple shifts.
- if ((KnownZero & HighBitMask) == HighBitMask) {
+ if (HighBitMask.isSubsetOf(Known.Zero)) {
// Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
// shift if x is zero. We can use XOR here because x is known to be smaller
// than 32.
@@ -1714,6 +1782,23 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
+ bool HasOpCarry = TLI.isOperationLegalOrCustom(
+ N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasOpCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+ }
+ return;
+ }
+
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
@@ -1742,9 +1827,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
ISD::UADDO : ISD::USUBO,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+
if (hasOVF) {
- SDVTList VTList = DAG.getVTList(NVT, NVT);
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+ EVT OvfVT = getSetCCResultType(NVT);
+ SDVTList VTList = DAG.getVTList(NVT, OvfVT);
int RevOpc;
if (N->getOpcode() == ISD::ADD) {
RevOpc = ISD::SUB;
@@ -1759,12 +1846,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
switch (BoolType) {
case TargetLoweringBase::UndefinedBooleanContent:
- OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
+ OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF);
LLVM_FALLTHROUGH;
case TargetLoweringBase::ZeroOrOneBooleanContent:
+ OVF = DAG.getZExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
break;
case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ OVF = DAG.getSExtOrTrunc(OVF, dl, NVT);
Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
}
return;
@@ -1775,6 +1864,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
+
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) {
+ SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ return;
+ }
+
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
@@ -1789,9 +1885,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
- DAG.getConstant(1, dl, NVT),
- DAG.getConstant(0, dl, NVT));
+
+ SDValue Borrow;
+ if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
+ Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT);
+ else
+ Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+
Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
@@ -1842,6 +1943,71 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc dl(N);
+
+ SDValue Ovf;
+
+ bool HasOpCarry = TLI.isOperationLegalOrCustom(
+ N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasOpCarry) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ovf);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH, SDValue() };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2508,29 +2674,6 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDLoc dl(N);
-
- // Expand the result by simply replacing it with the equivalent
- // non-overflow-checking operation.
- SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- SplitInteger(Sum, Lo, Hi);
-
- // Calculate the overflow: addition overflows iff a + b < a, and subtraction
- // overflows iff a - b > a.
- SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
- N->getOpcode () == ISD::UADDO ?
- ISD::SETULT : ISD::SETUGT);
-
- // Use the calculated overflow everywhere.
- ReplaceValueWith(SDValue(N, 1), Ofl);
-}
-
void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2586,24 +2729,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
}
// Also pass the address of the overflow check.
Entry.Node = Temp;
Entry.Ty = PtrTy->getPointerTo();
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
- .setSExtResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2743,6 +2887,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
+ case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
@@ -2877,14 +3022,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
return;
}
- // Lower with SETCCE if the target supports it.
+ // Lower with SETCCE or SETCCCARRY if the target supports it.
+ EVT HiVT = LHSHi.getValueType();
+ EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);
+ bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);
+
// FIXME: Make all targets support this, then remove the other lowering.
- if (TLI.getOperationAction(
- ISD::SETCCE,
- TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
- TargetLowering::Custom) {
- // SETCCE can detect < and >= directly. For > and <=, flip operands and
- // condition code.
+ if (HasSETCCCARRY ||
+ TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) {
+ // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip
+ // operands and condition code.
bool FlipOperands = false;
switch (CCCode) {
case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
@@ -2898,27 +3045,28 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
std::swap(LHSHi, RHSHi);
}
// Perform a wide subtraction, feeding the carry from the low part into
- // SETCCE. The SETCCE operation is essentially looking at the high part of
- // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
- // positive iff LHS >= RHS.
- SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
- SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
- SDValue Res =
- DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
- LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially
+ // looking at the high part of the result of LHS - RHS. It is negative
+ // iff LHS < RHS. It is zero or positive iff LHS >= RHS.
+ EVT LoVT = LHSLo.getValueType();
+ SDVTList VTList = DAG.getVTList(
+ LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue);
+ SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl,
+ VTList, LHSLo, RHSLo);
+ SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl,
+ getSetCCResultType(HiVT), LHSHi, RHSHi,
+ LowCmp.getValue(1), DAG.getCondCode(CCCode));
NewLHS = Res;
NewRHS = SDValue();
return;
}
- NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETEQ, false,
- DagCombineInfo, dl);
+ NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ,
+ false, DagCombineInfo, dl);
if (!NewLHS.getNode())
- NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETEQ);
- NewLHS = DAG.getSelect(dl, LoCmp.getValueType(),
- NewLHS, LoCmp, HiCmp);
+ NewLHS =
+ DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp);
NewRHS = SDValue();
}
@@ -2971,8 +3119,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)), 0);
+ return SDValue(
+ DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
@@ -2993,6 +3141,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
LowCmp.getValue(1), Cond);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
+ SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// The value being shifted is legal, but the shift amount is too big.
// It follows that either the result of the shift is undefined, or the
@@ -3226,7 +3392,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
@@ -3269,7 +3435,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
@@ -3317,7 +3483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
@@ -3420,5 +3586,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps);
+ return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index cf19d75..001eed9 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
SDValue Res(&Node, i);
+ EVT VT = Res.getValueType();
bool Failed = false;
unsigned Mapped = 0;
@@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
+ } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
}
} else {
- if (Mapped == 0) {
+ // If the value can be kept in HW registers, softening machinery can
+ // leave it unchanged and don't put it to any map.
+ if (Mapped == 0 &&
+ !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&
+ isLegalInHWReg(VT))) {
dbgs() << "Processed value not in any map!";
Failed = true;
} else if (Mapped & (Mapped - 1)) {
@@ -199,8 +204,7 @@ bool DAGTypeLegalizer::run() {
// non-leaves.
for (SDNode &Node : DAG.allnodes()) {
if (Node.getNumOperands() == 0) {
- Node.setNodeId(ReadyToProcess);
- Worklist.push_back(&Node);
+ AddToWorklist(&Node);
} else {
Node.setNodeId(Unanalyzed);
}
@@ -331,6 +335,7 @@ ScanOperands:
// to the worklist etc.
if (NeedsReanalyzing) {
assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+
N->setNodeId(NewNode);
// Recompute the NodeId and correct processed operands, adding the node to
// the worklist if ready.
@@ -918,9 +923,9 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
assert(Op.getValueType().isVector() && "Only applies to vectors!");
unsigned EltWidth = Op.getScalarValueSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
- unsigned NumElts = Op.getValueType().getVectorNumElements();
+ auto EltCnt = Op.getValueType().getVectorElementCount();
return DAG.getNode(ISD::BITCAST, SDLoc(Op),
- EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op);
}
SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
@@ -1077,8 +1082,8 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1087,9 +1092,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ec55662..c46d1b0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -191,6 +191,11 @@ private:
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
+ void AddToWorklist(SDNode *N) {
+ N->setNodeId(ReadyToProcess);
+ Worklist.push_back(N);
+ }
+
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
@@ -274,6 +279,7 @@ private:
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -306,6 +312,7 @@ private:
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -345,6 +352,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -373,6 +381,7 @@ private:
SDValue ExpandIntOp_SELECT_CC(SDNode *N);
SDValue ExpandIntOp_SETCC(SDNode *N);
SDValue ExpandIntOp_SETCCE(SDNode *N);
+ SDValue ExpandIntOp_SETCCCARRY(SDNode *N);
SDValue ExpandIntOp_Shift(SDNode *N);
SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -407,23 +416,13 @@ private:
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
- void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
- // When the result type can be kept in HW registers, the converted
- // NewRes node could have the same type. We can save the effort in
- // cloning every user of N in SoftenFloatOperand or other legalization functions,
- // by calling ReplaceValueWith here to update all users.
- if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
- ReplaceValueWith(SDValue(N, ResNo), NewRes);
- }
-
// Convert Float Results to Integer for Non-HW-supported Operations.
bool SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
@@ -462,17 +461,23 @@ private:
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
// Return true if we can skip softening the given operand or SDNode because
- // it was soften before by SoftenFloatResult and references to the operand
- // were replaced by ReplaceValueWith.
+ // either it was soften before by SoftenFloatResult and references to the
+ // operand were replaced by ReplaceValueWith or it's value type is legal in HW
+ // registers and the operand can be left unchanged.
bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
// Convert Float Operand to Integer for Non-HW-supported Operations.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FABS(SDNode *N);
+ SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
+ SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -597,6 +602,7 @@ private:
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
+ SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
@@ -621,6 +627,7 @@ private:
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
+ SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
@@ -666,12 +673,14 @@ private:
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
@@ -713,6 +722,7 @@ private:
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
@@ -782,6 +792,13 @@ private:
/// By default, the vector will be widened with undefined values.
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+ /// Return a mask of vector type MaskVT to replace InMask. Also adjust
+ /// MaskVT to ToMaskVT if needed with vector extension or truncation.
+ SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
+
+ /// Get the target mask VT, and widen if needed.
+ EVT getSETCCWidenedResultTy(SDValue SetCC);
+
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 3682c32..f330615 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Expand the floating point operand only if it was converted to integers.
// Otherwise, it is a legal type like f128 that can be saved in a register.
auto SoftenedOp = GetSoftenedFloat(InOp);
- if (SoftenedOp == InOp)
+ if (isLegalInHWReg(SoftenedOp.getValueType()))
break;
SplitInteger(SoftenedOp, Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -362,8 +362,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SmallVector<SDValue, 8> Ops;
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
- makeArrayRef(Ops.data(), NumElts));
+ SDValue Vec =
+ DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts));
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
@@ -396,10 +396,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
NewElts.push_back(Hi);
}
- SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
- EVT::getVectorVT(*DAG.getContext(),
- NewVT, NewElts.size()),
- NewElts);
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size());
+ SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts);
// Convert the new vector to the old vector type.
return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
@@ -458,7 +456,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
for (unsigned i = 1; i < NumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
@@ -512,8 +510,24 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
GetSplitOp(N->getOperand(1), LL, LH);
@@ -522,9 +536,16 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC)
+ std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
- if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ else if (getTypeAction(Cond.getValueType()) ==
+ TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index d4fa20f..9355dbe 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
@@ -224,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
return TranslateLegalizeResults(Op, Lowered);
}
+ LLVM_FALLTHROUGH;
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandLoad(Op));
@@ -621,8 +623,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), Vals);
+ Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
} else {
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
@@ -692,6 +693,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandUINT_TO_FLOAT(Op);
case ISD::FNEG:
return ExpandFNEG(Op);
+ case ISD::FSUB:
+ return ExpandFSUB(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
case ISD::BITREVERSE:
@@ -720,8 +723,6 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
- unsigned NumElem = VT.getVectorNumElements();
-
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
@@ -745,8 +746,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all-one or all zero.
- SmallVector<SDValue, 8> Ops(NumElem, Mask);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1025,6 +1025,18 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+ // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
+ // we can defer this to operation legalization where it will be lowered as
+ // a+(-b).
+ EVT VT = Op.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FADD, VT))
+ return Op; // Defer to LegalizeDAG
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
@@ -1102,7 +1114,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
(EltVT.getSizeInBits()), dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6906f67..6aa3270 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ R = ScalarizeVecRes_VecInregOp(N);
+ break;
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -97,6 +102,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
+ case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
@@ -257,6 +263,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
LHS, DAG.getValueType(ExtVT));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+
+ EVT OpVT = Op.getValueType();
+ EVT OpEltVT = OpVT.getVectorElementType();
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
+ }
+
+ llvm_unreachable("Illegal extend_vector_inreg opcode");
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
// If the operand is wider than the vector element type then it is implicitly
// truncated. Make that explicit here.
@@ -268,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
- SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue Cond = N->getOperand(0);
+ EVT OpVT = Cond.getValueType();
+ SDLoc DL(N);
+ // The vselect result and true/value operands needs scalarizing, but it's
+ // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Cond = GetScalarizedVector(Cond);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Cond = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
SDValue LHS = GetScalarizedVector(N->getOperand(1));
TargetLowering::BooleanContent ScalarBool =
TLI.getBooleanContents(false, false);
@@ -436,6 +484,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = ScalarizeVecOp_VSELECT(N);
break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -478,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -486,20 +537,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
Ops[i] = GetScalarizedVector(N->getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
}
/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
/// so just return the element, ignoring the index.
SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue Res = GetScalarizedVector(N->getOperand(0));
- if (Res.getValueType() != N->getValueType(0))
- Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0),
- Res);
+ if (Res.getValueType() != VT)
+ Res = VT.isFloatingPoint()
+ ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
+ : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
return Res;
}
-
/// If the input condition is a vector that needs to be scalarized, it must be
/// <1 x i1>, so just convert to a normal ISD::SELECT
/// (still with vector output type since that was acceptable if we got here).
@@ -511,6 +563,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
N->getOperand(2));
}
+/// If the operand is a vector that needs to be scalarized then the
+/// result must be v1i1, so just convert to a scalar SETCC and wrap
+/// with a scalar_to_vector since the res type is legal if we got here
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ EVT NVT = VT.getVectorElementType();
+ SDLoc DL(N);
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+ Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
+}
+
/// If the value to store is a vector that needs to be scalarized, it must be
/// <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
@@ -637,6 +719,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -695,7 +778,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
SDLoc dl(N);
- const SDNodeFlags *Flags = N->getFlags();
+ const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
@@ -781,10 +864,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
- Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
+ Lo = DAG.getBuildVector(LoVT, dl, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
- Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
+ Hi = DAG.getBuildVector(HiVT, dl, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
@@ -928,7 +1011,12 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
SDValue InLo, InHi;
- GetSplitVector(N0, InLo, InHi);
+
+ if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N0, InLo, InHi);
+ else
+ std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
+
EVT InLoVT = InLo.getValueType();
unsigned InNumElements = InLoVT.getVectorNumElements();
@@ -1253,12 +1341,9 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
if ((NumElements & 1) == 0 &&
SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
- EVT NewSrcVT = EVT::getVectorVT(
- Ctx, EVT::getIntegerVT(
- Ctx, SrcVT.getScalarSizeInBits() * 2),
- NumElements);
- EVT SplitSrcVT =
- EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
+ EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
+
EVT SplitLoVT, SplitHiVT;
std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
@@ -1372,7 +1457,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
+ Output = DAG.getBuildVector(NewVT, dl, SVOps);
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
@@ -1466,8 +1551,31 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
+ case ISD::FCANONICALIZE:
Res = SplitVecOp_UnaryOp(N);
break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
}
}
@@ -1520,6 +1628,48 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ bool NoNaN = N->getFlags().hasNoNaNs();
+ unsigned CombineOpc = 0;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
+ case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ default:
+ llvm_unreachable("Unexpected reduce ISD node");
+ }
+
+ // Use the appropriate scalar instruction on the split subvectors before
+ // reducing the now partially reduced smaller vector.
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
@@ -1615,7 +1765,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+ Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
}
// Store the vector to the stack.
@@ -1629,6 +1779,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
MachinePointerInfo(), EltVT);
}
+SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
+ SDValue Lo, Hi;
+
+ // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
+ // splitting the result has the same effect as splitting the input operand.
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
unsigned OpNo) {
EVT LoVT, HiVT;
@@ -1881,7 +2041,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
+ return DAG.getBuildVector(N->getValueType(0), DL, Elts);
}
SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
@@ -2165,7 +2325,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- const SDNodeFlags *Flags = N->getFlags();
+ const SDNodeFlags Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2313,7 +2473,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
unsigned InVTNumElts = InVT.getVectorNumElements();
- const SDNodeFlags *Flags = N->getFlags();
+ const SDNodeFlags Flags = N->getFlags();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -2323,6 +2483,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
}
if (TLI.isTypeLegal(InWidenVT)) {
@@ -2375,7 +2544,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
@@ -2430,7 +2599,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
while (Ops.size() != WidenNumElts)
Ops.push_back(DAG.getUNDEF(WidenSVT));
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
@@ -2568,7 +2737,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
if (InVT.isVector())
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
else
- NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -2593,7 +2762,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
+ return DAG.getBuildVector(WidenVT, dl, NewOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
@@ -2663,7 +2832,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -2704,7 +2873,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -2814,6 +2983,213 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true if this is a node that could have two SETCCs as operands.
+static inline bool isLogicalMaskOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return true;
+ }
+ return false;
+}
+
+// This is used just for the assert in convertMask(). Check that this either
+// a SETCC or a previously handled SETCC by convertMask().
+#ifndef NDEBUG
+static inline bool isSETCCorConvertedSETCC(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned i = 1; i < N->getNumOperands(); ++i)
+ if (!N->getOperand(i)->isUndef())
+ return false;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::SIGN_EXTEND)
+ N = N.getOperand(0);
+
+ if (isLogicalMaskOp(N.getOpcode()))
+ return isSETCCorConvertedSETCC(N.getOperand(0)) &&
+ isSETCCorConvertedSETCC(N.getOperand(1));
+
+ return (N.getOpcode() == ISD::SETCC ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+}
+#endif
+
+// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
+// to ToMaskVT if needed with vector extension or truncation.
+SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
+ EVT ToMaskVT) {
+ // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
+ // FIXME: This code seems to be too restrictive, we might consider
+ // generalizing it or dropping it.
+ assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
+
+ // Make a new Mask node, with a legal result VT.
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+ Ops.push_back(InMask->getOperand(i));
+ SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+
+ // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
+ // extend or truncate is needed.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
+ unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
+ if (MaskScalarBits < ToMaskScalBits) {
+ EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
+ } else if (MaskScalarBits > ToMaskScalBits) {
+ EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
+ }
+
+ assert(Mask->getValueType(0).getScalarSizeInBits() ==
+ ToMaskVT.getScalarSizeInBits() &&
+ "Mask should have the right element size by now.");
+
+ // Adjust Mask to the right number of elements.
+ unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
+ if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
+ ZeroIdx);
+ } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
+ unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
+ EVT SubVT = Mask->getValueType(0);
+ SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
+ SubConcatOps[0] = Mask;
+ for (unsigned i = 1; i < NumSubVecs; ++i)
+ SubConcatOps[i] = DAG.getUNDEF(SubVT);
+ Mask =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+ }
+
+ assert((Mask->getValueType(0) == ToMaskVT) &&
+ "A mask of ToMaskVT should have been produced by now.");
+
+ return Mask;
+}
+
+// Get the target mask VT, and widen if needed.
+EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
+ assert(SetCC->getOpcode() == ISD::SETCC);
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
+ return MaskVT;
+}
+
+// This method tries to handle VSELECT and its mask by legalizing operands
+// (which may require widening) and if needed adjusting the mask vector type
+// to match that of the VSELECT. Without it, many cases end up with
+// scalarization of the SETCC, with many unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Cond = N->getOperand(0);
+
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ return SDValue();
+
+ // If this is a splitted VSELECT that was previously already handled, do
+ // nothing.
+ if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+ return SDValue();
+
+ EVT VSelVT = N->getValueType(0);
+ // Only handle vector types which are a power of 2.
+ if (!isPowerOf2_64(VSelVT.getSizeInBits()))
+ return SDValue();
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = VSelVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
+
+ if (FinalVT.getVectorNumElements() == 1)
+ return SDValue();
+
+ // If there is support for an i1 vector mask, don't touch.
+ if (Cond.getOpcode() == ISD::SETCC) {
+ EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
+ SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
+ EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
+ if (SetCCResVT.getScalarSizeInBits() == 1)
+ return SDValue();
+ }
+
+ // Get the VT and operands for VSELECT, and widen if needed.
+ SDValue VSelOp1 = N->getOperand(1);
+ SDValue VSelOp2 = N->getOperand(2);
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
+ VSelOp1 = GetWidenedVector(VSelOp1);
+ VSelOp2 = GetWidenedVector(VSelOp2);
+ }
+
+ // The mask of the VSELECT should have integer elements.
+ EVT ToMaskVT = VSelVT;
+ if (!ToMaskVT.getScalarType().isInteger())
+ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
+
+ SDValue Mask;
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT MaskVT = getSETCCWidenedResultTy(Cond);
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else if (isLogicalMaskOp(Cond->getOpcode()) &&
+ Cond->getOperand(0).getOpcode() == ISD::SETCC &&
+ Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ // Cond is (AND/OR/XOR (SETCC, SETCC))
+ SDValue SETCC0 = Cond->getOperand(0);
+ SDValue SETCC1 = Cond->getOperand(1);
+ EVT VT0 = getSETCCWidenedResultTy(SETCC0);
+ EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+ unsigned ScalarBits0 = VT0.getScalarSizeInBits();
+ unsigned ScalarBits1 = VT1.getScalarSizeInBits();
+ unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
+ EVT MaskVT;
+ // If the two SETCCs have different VTs, either extend/truncate one of
+ // them to the other "towards" ToMaskVT, or truncate one and extend the
+ // other to ToMaskVT.
+ if (ScalarBits0 != ScalarBits1) {
+ EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
+ EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
+ if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
+ MaskVT = WideVT;
+ else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
+ MaskVT = NarrowVT;
+ else
+ MaskVT = ToMaskVT;
+ } else
+ // If the two SETCCs have the same VT, don't change it.
+ MaskVT = VT0;
+
+ // Make new SETCCs and logical nodes.
+ SETCC0 = convertMask(SETCC0, VT0, MaskVT);
+ SETCC1 = convertMask(SETCC1, VT1, MaskVT);
+ Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
+
+ // Convert the logical op for VSELECT if needed.
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -2821,6 +3197,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
@@ -3093,7 +3472,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
@@ -3144,7 +3523,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -3565,10 +3944,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
-
void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
@@ -3737,5 +4115,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
Ops[Idx] = FillVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+ return DAG.getBuildVector(NVT, dl, Ops);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index ded8e68..a21b4c7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -57,10 +57,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end();
- I != E; ++I)
- RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
ParallelLiveRanges = 0;
HorizontalVerticalBalance = 0;
@@ -69,12 +67,11 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
unsigned
ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
unsigned NumberDeps = 0;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
continue;
- SUnit *PredSU = I->getSUnit();
+ SUnit *PredSU = Pred.getSUnit();
const SDNode *ScegN = PredSU->getNode();
if (!ScegN)
@@ -107,12 +104,11 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
unsigned RCId) {
unsigned NumberDeps = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl())
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
continue;
- SUnit *SuccSU = I->getSUnit();
+ SUnit *SuccSU = Succ.getSUnit();
const SDNode *ScegN = SuccSU->getNode();
if (!ScegN)
continue;
@@ -144,9 +140,8 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
static unsigned numberCtrlDepsInSU(SUnit *SU) {
unsigned NumberDeps = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I)
- if (I->isCtrl())
+ for (const SDep &Succ : SU->Succs)
+ if (Succ.isCtrl())
NumberDeps++;
return NumberDeps;
@@ -154,9 +149,8 @@ static unsigned numberCtrlDepsInSU(SUnit *SU) {
static unsigned numberCtrlPredInSU(SUnit *SU) {
unsigned NumberDeps = 0;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I)
- if (I->isCtrl())
+ for (SDep &Pred : SU->Preds)
+ if (Pred.isCtrl())
NumberDeps++;
return NumberDeps;
@@ -214,15 +208,14 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
/// of SU, return it, otherwise return null.
SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
SUnit *OnlyAvailablePred = nullptr;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit &Pred = *I->getSUnit();
- if (!Pred.isScheduled) {
+ for (const SDep &Pred : SU->Preds) {
+ SUnit &PredSU = *Pred.getSUnit();
+ if (!PredSU.isScheduled) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
- if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
return nullptr;
- OnlyAvailablePred = &Pred;
+ OnlyAvailablePred = &PredSU;
}
}
return OnlyAvailablePred;
@@ -232,9 +225,8 @@ void ResourcePriorityQueue::push(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I)
- if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
++NumNodesBlocking;
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
@@ -271,14 +263,13 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
// Now see if there are no other dependencies
// to instructions already in the packet.
for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
- E = Packet[i]->Succs.end(); I != E; ++I) {
+ for (const SDep &Succ : Packet[i]->Succs) {
// Since we do not add pseudos to packets, might as well
// ignore order deps.
- if (I->isCtrl())
+ if (Succ.isCtrl())
continue;
- if (I->getSUnit() == SU)
+ if (Succ.getSUnit() == SU)
return false;
}
@@ -364,16 +355,11 @@ int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
return RegBalance;
if (RawPressure) {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses())
RegBalance += rawRegPressureDelta(SU, RC->getID());
- }
}
else {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
if ((RegPressure[RC->getID()] +
rawRegPressureDelta(SU, RC->getID()) > 0) &&
(RegPressure[RC->getID()] +
@@ -506,11 +492,10 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
}
}
}
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
continue;
- --I->getSUnit()->NumRegDefsLeft;
+ --Pred.getSUnit()->NumRegDefsLeft;
}
}
@@ -522,10 +507,9 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
// number of live ranges. All others, increase it.
unsigned NumberNonControlDeps = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- adjustPriorityOfUnscheduledPreds(I->getSUnit());
- if (!I->isCtrl())
+ for (const SDep &Succ : SU->Succs) {
+ adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
+ if (!Succ.isCtrl())
NumberNonControlDeps++;
}
@@ -602,8 +586,7 @@ SUnit *ResourcePriorityQueue::pop() {
std::vector<SUnit *>::iterator Best = Queue.begin();
if (!DisableDFASched) {
int BestCost = SUSchedulingCost(*Best);
- for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
- E = Queue.end(); I != E; ++I) {
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
if (SUSchedulingCost(*I) > BestCost) {
BestCost = SUSchedulingCost(*I);
@@ -613,8 +596,7 @@ SUnit *ResourcePriorityQueue::pop() {
}
// Use default TD scheduling mechanism.
else {
- for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
- E = Queue.end(); I != E; ++I)
+ for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 62e7733..1379940 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "InstrEmitter.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -160,18 +160,17 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
// Bottom up: release predecessors
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- ReleasePred(SU, &*I);
- if (I->isAssignedRegDep()) {
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
// expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- if (!LiveRegDefs[I->getReg()]) {
+ if (!LiveRegDefs[Pred.getReg()]) {
++NumLiveRegs;
- LiveRegDefs[I->getReg()] = I->getSUnit();
- LiveRegCycles[I->getReg()] = CurCycle;
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ LiveRegCycles[Pred.getReg()] = CurCycle;
}
}
}
@@ -191,16 +190,15 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
ReleasePredecessors(SU, CurCycle);
// Release all the implicit physical register defs that are live.
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == SU &&
+ assert(LiveRegDefs[Succ.getReg()] == SU &&
"Physical register dependency violated?");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = nullptr;
- LiveRegCycles[I->getReg()] = 0;
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegCycles[Succ.getReg()] = 0;
}
}
}
@@ -282,22 +280,20 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
SmallVector<SDep, 4> LoadPreds;
SmallVector<SDep, 4> NodePreds;
SmallVector<SDep, 4> NodeSuccs;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- ChainPred = *I;
- else if (I->getSUnit()->getNode() &&
- I->getSUnit()->getNode()->isOperandOf(LoadNode))
- LoadPreds.push_back(*I);
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPred = Pred;
+ else if (Pred.getSUnit()->getNode() &&
+ Pred.getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(Pred);
else
- NodePreds.push_back(*I);
+ NodePreds.push_back(Pred);
}
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl())
- ChainSuccs.push_back(*I);
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
else
- NodeSuccs.push_back(*I);
+ NodeSuccs.push_back(Succ);
}
if (ChainPred.getSUnit()) {
@@ -354,21 +350,19 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
NewSU = Clone(SU);
// New SUnit has the exact same predecessors.
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I)
- if (!I->isArtificial())
- AddPred(NewSU, *I);
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPred(NewSU, Pred);
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isArtificial())
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
continue;
- SUnit *SuccSU = I->getSUnit();
+ SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
- SDep D = *I;
+ SDep D = Succ;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
D.setSUnit(SU);
@@ -399,16 +393,15 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isArtificial())
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
continue;
- SUnit *SuccSU = I->getSUnit();
+ SUnit *SuccSU = Succ.getSUnit();
if (SuccSU->isScheduled) {
- SDep D = *I;
+ SDep D = Succ;
D.setSUnit(CopyToSU);
AddPred(SuccSU, D);
- DelDeps.push_back(std::make_pair(SuccSU, *I));
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
@@ -479,10 +472,9 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
SmallSet<unsigned, 4> RegAdded;
// If this node would clobber any "live" register, then it's not ready.
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isAssignedRegDep()) {
- CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep()) {
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
}
@@ -755,9 +747,8 @@ void ScheduleDAGLinearize::Schedule() {
// Glue user must be scheduled together with the glue operand. So other
// users of the glue operand must be treated as its users.
SDNode *ImmGUser = Glue->getGluedUser();
- for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
- ui != ue; ++ui)
- if (*ui == ImmGUser)
+ for (const SDNode *U : Glue->uses())
+ if (U == ImmGUser)
--Degree;
GUser->setNodeId(UDegree + Degree);
Glue->setNodeId(1);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3549ccd..70b1fa7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -15,13 +15,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -226,6 +226,7 @@ private:
void UnscheduleNodeBottomUp(SUnit*);
void RestoreHazardCheckerBottomUp();
void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *TryUnfoldSU(SUnit *);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
@@ -422,11 +423,9 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
if (NestLevel == 0)
return false;
--NestLevel;
@@ -480,12 +479,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
MaxNest = std::max(MaxNest, NestLevel);
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NestLevel != 0);
--NestLevel;
if (NestLevel == 0)
@@ -524,21 +521,20 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
/// interference on flags.
void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
// Bottom up: release predecessors
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- ReleasePred(SU, &*I);
- if (I->isAssignedRegDep()) {
+ for (SDep &Pred : SU->Preds) {
+ ReleasePred(SU, &Pred);
+ if (Pred.isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
// expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
- assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
"interference on register dependence");
- LiveRegDefs[I->getReg()] = I->getSUnit();
- if (!LiveRegGens[I->getReg()]) {
+ LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
+ if (!LiveRegGens[Pred.getReg()]) {
++NumLiveRegs;
- LiveRegGens[I->getReg()] = SU;
+ LiveRegGens[Pred.getReg()] = SU;
}
}
}
@@ -550,7 +546,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
if (!LiveRegDefs[CallResource])
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
if (Node->isMachineOpcode() &&
- Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
unsigned NestLevel = 0;
unsigned MaxNest = 0;
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
@@ -737,15 +733,14 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
ReleasePredecessors(SU);
// Release all the implicit physical register defs that are live.
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
- if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ for (SDep &Succ : SU->Succs) {
+ // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
+ if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = nullptr;
- LiveRegGens[I->getReg()] = nullptr;
- releaseInterferences(I->getReg());
+ LiveRegDefs[Succ.getReg()] = nullptr;
+ LiveRegGens[Succ.getReg()] = nullptr;
+ releaseInterferences(Succ.getReg());
}
}
// Release the special call resource dependence, if this is the beginning
@@ -755,7 +750,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -786,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
-/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// unscheduled, increase the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
@@ -806,17 +801,16 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
DEBUG(SU->dump(this));
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- CapturePred(&*I);
- if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+ for (SDep &Pred : SU->Preds) {
+ CapturePred(&Pred);
+ if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = nullptr;
- LiveRegGens[I->getReg()] = nullptr;
- releaseInterferences(I->getReg());
+ LiveRegDefs[Pred.getReg()] = nullptr;
+ LiveRegGens[Pred.getReg()] = nullptr;
+ releaseInterferences(Pred.getReg());
}
}
@@ -826,7 +820,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
LiveRegGens[CallResource] = CallSeqEndForStart[SU];
@@ -839,7 +833,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -899,7 +893,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
unsigned HazardCycle = (*I)->getHeight();
- for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ for (auto E = Sequence.end(); I != E; ++I) {
SUnit *SU = *I;
for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
HazardRec->RecedeCycle();
@@ -941,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
return false;
}
+/// TryUnfold - Attempt to unfold
+SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ // Use while over if to ease fall through.
+ SmallVector<SDNode *, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ // If LoadSU has already been scheduled, we should clone it but
+ // this would negate the benefit to unfolding so just return SU.
+ if (LoadSU->isScheduled)
+ return SU;
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+
+ // Now that we are committed to unfolding replace DAG Uses.
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (SDep D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
+ !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0)
+ NewSU->isAvailable = true;
+
+ return NewSU;
+}
+
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
@@ -966,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return nullptr;
}
+ // If possible unfold instruction.
if (TryUnfold) {
- SmallVector<SDNode*, 2> NewNodes;
- if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ SUnit *UnfoldSU = TryUnfoldSU(SU);
+ if (!UnfoldSU)
return nullptr;
-
- // unfolding an x86 DEC64m operation results in store, dec, load which
- // can't be handled here so quit
- if (NewNodes.size() == 3)
- return nullptr;
-
- DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
- assert(NewNodes.size() == 2 && "Expected a load folding node!");
-
- N = NewNodes[1];
- SDNode *LoadNode = NewNodes[0];
- unsigned NumVals = N->getNumValues();
- unsigned OldNumVals = SU->getNode()->getNumValues();
- for (unsigned i = 0; i != NumVals; ++i)
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
- SDValue(LoadNode, 1));
-
- // LoadNode may already exist. This can happen when there is another
- // load from the same location and producing the same type of value
- // but it has different alignment or volatileness.
- bool isNewLoad = true;
- SUnit *LoadSU;
- if (LoadNode->getNodeId() != -1) {
- LoadSU = &SUnits[LoadNode->getNodeId()];
- isNewLoad = false;
- } else {
- LoadSU = CreateNewSUnit(LoadNode);
- LoadNode->setNodeId(LoadSU->NodeNum);
-
- InitNumRegDefsLeft(LoadSU);
- computeLatency(LoadSU);
- }
-
- SUnit *NewSU = CreateNewSUnit(N);
- assert(N->getNodeId() == -1 && "Node already inserted!");
- N->setNodeId(NewSU->NodeNum);
-
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
- if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- NewSU->isTwoAddress = true;
- break;
- }
- }
- if (MCID.isCommutable())
- NewSU->isCommutable = true;
-
- InitNumRegDefsLeft(NewSU);
- computeLatency(NewSU);
-
- // Record all the edges to and from the old SU, by category.
- SmallVector<SDep, 4> ChainPreds;
- SmallVector<SDep, 4> ChainSuccs;
- SmallVector<SDep, 4> LoadPreds;
- SmallVector<SDep, 4> NodePreds;
- SmallVector<SDep, 4> NodeSuccs;
- for (SDep &Pred : SU->Preds) {
- if (Pred.isCtrl())
- ChainPreds.push_back(Pred);
- else if (isOperandOf(Pred.getSUnit(), LoadNode))
- LoadPreds.push_back(Pred);
- else
- NodePreds.push_back(Pred);
- }
- for (SDep &Succ : SU->Succs) {
- if (Succ.isCtrl())
- ChainSuccs.push_back(Succ);
- else
- NodeSuccs.push_back(Succ);
- }
-
- // Now assign edges to the newly-created nodes.
- for (const SDep &Pred : ChainPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : LoadPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : NodePreds) {
- RemovePred(SU, Pred);
- AddPred(NewSU, Pred);
- }
- for (SDep D : NodeSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- D.setSUnit(NewSU);
- AddPred(SuccDep, D);
- // Balance register pressure.
- if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
- && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
- --NewSU->NumRegDefsLeft;
- }
- for (SDep D : ChainSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- if (isNewLoad) {
- D.setSUnit(LoadSU);
- AddPred(SuccDep, D);
- }
- }
-
- // Add a data dependency to reflect that NewSU reads the value defined
- // by LoadSU.
- SDep D(LoadSU, SDep::Data, 0);
- D.setLatency(LoadSU->Latency);
- AddPred(NewSU, D);
-
- if (isNewLoad)
- AvailableQueue->addNode(LoadSU);
- AvailableQueue->addNode(NewSU);
-
- ++NumUnfolds;
-
- if (NewSU->NumSuccsLeft == 0) {
- NewSU->isAvailable = true;
- return NewSU;
- }
- SU = NewSU;
+ SU = UnfoldSU;
+ N = SU->getNode();
+ // If this can be scheduled don't bother duplicating and just return
+ if (SU->NumSuccsLeft == 0)
+ return SU;
}
DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
@@ -1265,10 +1280,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
//
// If SU is the currently live definition of the same register that it uses,
// then we are free to schedule it.
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
- CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
+ CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
RegAdded, LRegs, TRI);
}
@@ -1305,7 +1319,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// If we're in the middle of scheduling a call, don't begin scheduling
// another call. Also, don't allow any physical registers to be live across
// the call.
- if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
+ (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
// Check the special calling-sequence resource.
unsigned CallResource = TRI->getNumRegs();
if (LiveRegDefs[CallResource]) {
@@ -1323,6 +1338,18 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (MCID.hasOptionalDef()) {
+ // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
+ // This operand can be either a def of CPSR, if the S bit is set; or a use
+ // of %noreg. When the OptionalDef is set to a valid register, we need to
+ // handle it in the same way as an ImplicitDef.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
+ if (MCID.OpInfo[i].isOptionalDef()) {
+ const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
+ unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ }
if (!MCID.ImplicitDefs)
continue;
for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
@@ -1659,9 +1686,8 @@ public:
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
}
}
@@ -1735,8 +1761,7 @@ protected:
template<class SF>
static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
std::vector<SUnit *>::iterator Best = Q.begin();
- for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
- E = Q.end(); I != E; ++I)
+ for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
SUnit *V = *Best;
@@ -1788,7 +1813,7 @@ public:
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void dump(ScheduleDAG *DAG) const override {
+ LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
@@ -1836,28 +1861,68 @@ static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
/// Smaller number is the higher priority.
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
- unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
- if (SethiUllmanNumber != 0)
- return SethiUllmanNumber;
+ if (SUNumbers[SU->NodeNum] != 0)
+ return SUNumbers[SU->NodeNum];
+
+ // Use WorkList to avoid stack overflow on excessively large IRs.
+ struct WorkState {
+ WorkState(const SUnit *SU) : SU(SU) {}
+ const SUnit *SU;
+ unsigned PredsProcessed = 0;
+ };
- unsigned Extra = 0;
- for (const SDep &Pred : SU->Preds) {
- if (Pred.isCtrl()) continue; // ignore chain preds
- SUnit *PredSU = Pred.getSUnit();
- unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
- if (PredSethiUllman > SethiUllmanNumber) {
- SethiUllmanNumber = PredSethiUllman;
- Extra = 0;
- } else if (PredSethiUllman == SethiUllmanNumber)
- ++Extra;
- }
+ SmallVector<WorkState, 16> WorkList;
+ WorkList.push_back(SU);
+ while (!WorkList.empty()) {
+ auto &Temp = WorkList.back();
+ auto *TempSU = Temp.SU;
+ bool AllPredsKnown = true;
+ // Try to find a non-evaluated pred and push it into the processing stack.
+ for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
+ auto &Pred = TempSU->Preds[P];
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (SUNumbers[PredSU->NodeNum] == 0) {
+#ifndef NDEBUG
+ // In debug mode, check that we don't have such element in the stack.
+ for (auto It : WorkList)
+ assert(It.SU != PredSU && "Trying to push an element twice?");
+#endif
+ // Next time start processing this one starting from the next pred.
+ Temp.PredsProcessed = P + 1;
+ WorkList.push_back(PredSU);
+ AllPredsKnown = false;
+ break;
+ }
+ }
- SethiUllmanNumber += Extra;
+ if (!AllPredsKnown)
+ continue;
- if (SethiUllmanNumber == 0)
- SethiUllmanNumber = 1;
+ // Once all preds are known, we can calculate the answer for this one.
+ unsigned SethiUllmanNumber = 0;
+ unsigned Extra = 0;
+ for (const SDep &Pred : TempSU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
+ assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+ SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
+ WorkList.pop_back();
+ }
- return SethiUllmanNumber;
+ assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
+ return SUNumbers[SU->NodeNum];
}
/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
@@ -1924,19 +1989,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// Register Pressure Tracking
//===----------------------------------------------------------------------===//
-void RegReductionPQBase::dumpRegPressure() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
<< RegLimit[Id] << '\n');
}
-#endif
}
+#endif
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (!TLI)
@@ -2092,7 +2155,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
RegPressure[RCId] -= Cost;
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
@@ -2172,7 +2235,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3be622f..3c8526e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -650,6 +650,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
@@ -704,8 +705,8 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (!N->getHasDebugValue())
return;
- // Opportunistically insert immediate dbg_value uses, i.e. those with source
- // order number right after the N.
+ // Opportunistically insert immediate dbg_value uses, i.e. those with the same
+ // source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
@@ -713,7 +714,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (!Order || DVOrder == ++Order) {
+ if (!Order || DVOrder == Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -835,8 +836,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
- Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap);
+ Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index eee4a4b..631cb34 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -18,12 +18,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e225ba8..16f425d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,43 +13,66 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/IR/CallingConv.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <set>
+#include <string>
#include <utility>
+#include <vector>
using namespace llvm;
@@ -93,7 +116,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// ISD Namespace
//===----------------------------------------------------------------------===//
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
+ bool AllowShrink) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@@ -101,9 +125,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
APInt SplatUndef;
unsigned SplatBitSize;
bool HasUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize;
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
+ MinSplatBits) &&
+ EltSize >= SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
@@ -268,7 +294,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
return ISD::CondCode(Operation);
}
-
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
@@ -289,28 +314,28 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
unsigned Op = Op1 | Op2; // Combine all of the condition bits.
- // If the N and U bits get set then the resultant comparison DOES suddenly
- // care about orderedness, and is true when ordered.
+ // If the N and U bits get set, then the resultant comparison DOES suddenly
+ // care about orderedness, and it is true when ordered.
if (Op > ISD::SETTRUE2)
Op &= ~16; // Clear the U bit if the N bit is set.
// Canonicalize illegal integer setcc's.
- if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
Op = ISD::SETNE;
return ISD::CondCode(Op);
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
@@ -318,7 +343,7 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
// Canonicalize illegal integer setcc's.
- if (isInteger) {
+ if (IsInteger) {
switch (Result) {
default: break;
case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
@@ -337,7 +362,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
//===----------------------------------------------------------------------===//
/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
-///
static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
ID.AddInteger(OpC);
}
@@ -349,7 +373,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDValue> Ops) {
for (auto& Op : Ops) {
@@ -359,7 +382,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDUse> Ops) {
for (auto& Op : Ops) {
@@ -391,10 +413,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
case ISD::TargetConstantFP:
- case ISD::ConstantFP: {
+ case ISD::ConstantFP:
ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
break;
- }
case ISD::TargetGlobalAddress:
case ISD::GlobalAddress:
case ISD::TargetGlobalTLSAddress:
@@ -572,6 +593,11 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
// worklist.
while (!DeadNodes.empty()) {
SDNode *N = DeadNodes.pop_back_val();
+ // Skip to next node if we've already managed to delete the node. This could
+ // happen if replacing a node causes a node previously added to the node to
+ // be deleted.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ continue;
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
DUL->NodeDeleted(N, nullptr);
@@ -639,12 +665,15 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
// If we have operands, deallocate them.
removeOperands(N);
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
// Set the opcode to DELETED_NODE to help catch bugs when node
// memory is reallocated.
+ // FIXME: There are places in SDag that have grown a dependency on the opcode
+ // value in the released node.
+ __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType));
N->NodeType = ISD::DELETED_NODE;
- NodeAllocator.Deallocate(AllNodes.remove(N));
-
// If any of the SDDbgValue nodes refer to this SDNode, invalidate
// them and forget about that node.
DbgInfo->erase(N);
@@ -766,7 +795,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
/// maps and modified in place. Add it back to the CSE maps, unless an identical
/// node already exists, in which case transfer all its users to the existing
/// node. This transfer can potentially trigger recursive merging.
-///
void
SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
@@ -807,8 +835,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
- if (const SDNodeFlags *Flags = N->getFlags())
- Node->intersectFlagsWith(Flags);
+ Node->intersectFlagsWith(N->getFlags());
return Node;
}
@@ -828,12 +855,10 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
- if (const SDNodeFlags *Flags = N->getFlags())
- Node->intersectFlagsWith(Flags);
+ Node->intersectFlagsWith(N->getFlags());
return Node;
}
-
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
@@ -848,8 +873,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
AddNodeIDCustom(ID, N);
SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
if (Node)
- if (const SDNodeFlags *Flags = N->getFlags())
- Node->intersectFlagsWith(Flags);
+ Node->intersectFlagsWith(N->getFlags());
return Node;
}
@@ -863,19 +887,20 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ : TM(tm), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(nullptr) {
+ Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf) {
- MF = &mf;
+void SelectionDAG::init(MachineFunction &NewMF,
+ OptimizationRemarkEmitter &NewORE) {
+ MF = &NewMF;
+ ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
- Context = &mf.getFunction()->getContext();
+ Context = &MF->getFunction()->getContext();
}
SelectionDAG::~SelectionDAG() {
@@ -895,29 +920,6 @@ void SelectionDAG::allnodes_clear() {
#endif
}
-SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL,
- SDVTList VTs, SDValue N1, SDValue N2,
- const SDNodeFlags *Flags) {
- SDValue Ops[] = {N1, N2};
-
- if (isBinOpWithFlags(Opcode)) {
- // If no flags were passed in, use a default flags object.
- SDNodeFlags F;
- if (Flags == nullptr)
- Flags = &F;
-
- auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, *Flags);
- createOperands(FN, Ops);
-
- return FN;
- }
-
- auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
- createOperands(N, Ops);
- return N;
-}
-
SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
@@ -979,6 +981,12 @@ void SelectionDAG::clear() {
DbgInfo->clear();
}
+SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::FP_EXTEND, DL, VT, Op)
+ : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
+}
+
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
@@ -1052,7 +1060,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
}
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
-///
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue NegOne =
@@ -1331,7 +1338,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
return SDValue(N, 0);
}
-
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
@@ -1465,7 +1471,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
- assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
@@ -1824,7 +1830,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
@@ -1837,7 +1843,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
- return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+ return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
@@ -1953,7 +1959,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+ return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth);
}
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
@@ -1961,9 +1967,9 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// for bits that V cannot have.
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
- APInt KnownZero, KnownOne;
- computeKnownBits(Op, KnownZero, KnownOne, Depth);
- return (KnownZero & Mask) == Mask;
+ KnownBits Known;
+ computeKnownBits(Op, Known, Depth);
+ return Mask.isSubsetOf(Known.Zero);
}
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
@@ -1979,33 +1985,30 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
}
/// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
-/// those that are shared by every vector element.
-void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth) const {
+/// them in Known. For vectors, the known bits are those that are shared by
+/// every vector element.
+void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
+ unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
- computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+ computeKnownBits(Op, Known, DemandedElts, Depth);
}
/// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
-/// us to only collect the known bits that are shared by the requested vector
-/// elements.
-/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
-/// should be added when they become necessary.
-void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const APInt &DemandedElts,
+/// them in Known. The DemandedElts argument allows us to only collect the known
+/// bits that are shared by the requested vector elements.
+void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
unsigned Depth) const {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ Known = KnownBits(BitWidth); // Don't know anything.
if (Depth == 6)
return; // Limit search depth.
- APInt KnownZero2, KnownOne2;
+ KnownBits Known2;
unsigned NumElts = DemandedElts.getBitWidth();
if (!DemandedElts)
@@ -2015,35 +2018,34 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
switch (Opcode) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
- KnownZero = ~KnownOne;
+ Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+ Known.Zero = ~Known.One;
break;
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
assert(NumElts == Op.getValueType().getVectorNumElements() &&
"Unexpected vector size");
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
if (!DemandedElts[i])
continue;
SDValue SrcOp = Op.getOperand(i);
- computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
+ computeKnownBits(SrcOp, Known2, Depth + 1);
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
if (SrcOp.getValueSizeInBits() != BitWidth) {
assert(SrcOp.getValueSizeInBits() > BitWidth &&
"Expected BUILD_VECTOR implicit truncation");
- KnownOne2 = KnownOne2.trunc(BitWidth);
- KnownZero2 = KnownZero2.trunc(BitWidth);
+ Known2 = Known2.trunc(BitWidth);
}
// Known bits are the values that are shared by every demanded element.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
}
break;
@@ -2051,7 +2053,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Collect the known bits that are shared by every vector element referenced
// by the shuffle.
APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2062,8 +2064,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (M < 0) {
// For UNDEF elements, we don't know anything about the common state of
// the shuffle result.
- KnownOne.clearAllBits();
- KnownZero.clearAllBits();
+ Known.resetAll();
DemandedLHS.clearAllBits();
DemandedRHS.clearAllBits();
break;
@@ -2077,24 +2078,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Known bits are the values that are shared by every demanded element.
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
- computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
- computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
break;
}
case ISD::CONCAT_VECTORS: {
// Split DemandedElts and test each of the demanded subvectors.
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
EVT SubVectorVT = Op.getOperand(0).getValueType();
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
@@ -2103,12 +2104,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
DemandedSub = DemandedSub.trunc(NumSubVectorElts);
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
- computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1);
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ computeKnownBits(Sub, Known2, DemandedSub, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
}
break;
@@ -2123,9 +2124,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
- computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
+ computeKnownBits(Src, Known, DemandedSrc, Depth + 1);
} else {
- computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
+ computeKnownBits(Src, Known, Depth + 1);
}
break;
}
@@ -2139,7 +2140,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Fast handling of 'identity' bitcasts.
if (BitWidth == SubBitWidth) {
- computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+ computeKnownBits(N0, Known, DemandedElts, Depth + 1);
break;
}
@@ -2163,10 +2164,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
SubDemandedElts.setBit(i * SubScale);
for (unsigned i = 0; i != SubScale; ++i) {
- computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i),
+ computeKnownBits(N0, Known2, SubDemandedElts.shl(i),
Depth + 1);
- KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i);
- KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i);
+ Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i);
+ Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i);
}
}
@@ -2183,16 +2184,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (DemandedElts[i])
SubDemandedElts.setBit(i / SubScale);
- computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1);
+ computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1);
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % SubScale) * BitWidth;
- KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth);
- KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth);
+ Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
+ Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
}
}
@@ -2200,107 +2201,90 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne &= KnownOne2;
+ Known.One &= Known2.One;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero |= KnownZero2;
+ Known.Zero |= Known2.Zero;
break;
case ISD::OR:
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero &= KnownZero2;
+ Known.Zero &= Known2.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne |= KnownOne2;
+ Known.One |= Known2.One;
break;
case ISD::XOR: {
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
- KnownZero = KnownZeroOut;
+ Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+ Known.Zero = KnownZeroOut;
break;
}
case ISD::MUL: {
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If low bits are zero in either operand, output low known-0 bits.
// Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clearAllBits();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
+ unsigned TrailZ = Known.countMinTrailingZeros() +
+ Known2.countMinTrailingZeros();
+ unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
+ Known2.countMinLeadingZeros(),
BitWidth) - BitWidth;
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
+ Known.resetAll();
+ Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
+ Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
break;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
- unsigned LeadZ = KnownZero2.countLeadingOnes();
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ unsigned LeadZ = Known2.countMinLeadingZeros();
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
- unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
- if (RHSUnknownLeadingOnes != BitWidth)
- LeadZ = std::min(BitWidth,
- LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
+ if (RHSMaxLeadingZeros != BitWidth)
+ LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ Known.Zero.setHighBits(LeadZ);
break;
}
case ISD::SELECT:
- computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(2), Known, Depth+1);
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), Known2, Depth+1);
// Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(3), Known, Depth+1);
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
- computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(2), Known2, Depth+1);
// Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
break;
- case ISD::SADDO:
- case ISD::UADDO:
- case ISD::SSUBO:
- case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -2312,51 +2296,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ Known.Zero.setBitsFrom(1);
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- KnownZero = KnownZero << *ShAmt;
- KnownOne = KnownOne << *ShAmt;
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known.Zero <<= *ShAmt;
+ Known.One <<= *ShAmt;
// Low bits are known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
+ Known.Zero.setLowBits(ShAmt->getZExtValue());
}
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- KnownZero = KnownZero.lshr(*ShAmt);
- KnownOne = KnownOne.lshr(*ShAmt);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known.Zero.lshrInPlace(*ShAmt);
+ Known.One.lshrInPlace(*ShAmt);
// High bits are known zero.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
- KnownZero |= HighBits;
+ Known.Zero.setHighBits(ShAmt->getZExtValue());
}
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- KnownZero = KnownZero.lshr(*ShAmt);
- KnownOne = KnownOne.lshr(*ShAmt);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known.Zero.lshrInPlace(*ShAmt);
+ Known.One.lshrInPlace(*ShAmt);
// If we know the value of the sign bit, then we know it is copied across
// the high bits by the shift amount.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
- APInt SignBit = APInt::getSignBit(BitWidth);
- SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask.
- if (KnownZero.intersects(SignBit)) {
- KnownZero |= HighBits; // New bits are known zero.
- } else if (KnownOne.intersects(SignBit)) {
- KnownOne |= HighBits; // New bits are known one.
+ APInt SignMask = APInt::getSignMask(BitWidth);
+ SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
+ if (Known.Zero.intersects(SignMask)) {
+ Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
+ } else if (Known.One.intersects(SignMask)) {
+ Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
}
}
break;
@@ -2368,42 +2347,56 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// present in the input.
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
- APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InSignMask = APInt::getSignMask(EBits);
APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit = InSignBit.zext(BitWidth);
+ InSignMask = InSignMask.zext(BitWidth);
if (NewBits.getBoolValue())
- InputDemandedBits |= InSignBit;
+ InputDemandedBits |= InSignMask;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- KnownOne &= InputDemandedBits;
- KnownZero &= InputDemandedBits;
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known.One &= InputDemandedBits;
+ Known.Zero &= InputDemandedBits;
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
- if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
- KnownZero |= NewBits;
- KnownOne &= ~NewBits;
- } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
- KnownOne |= NewBits;
- KnownZero &= ~NewBits;
+ if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
+ Known.Zero |= NewBits;
+ Known.One &= ~NewBits;
+ } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
+ Known.One |= NewBits;
+ Known.Zero &= ~NewBits;
} else { // Input sign bit unknown
- KnownZero &= ~NewBits;
- KnownOne &= ~NewBits;
+ Known.Zero &= ~NewBits;
+ Known.One &= ~NewBits;
}
break;
}
case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF: {
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleTZ = Known2.countMaxTrailingZeros();
+ unsigned LowBits = Log2_32(PossibleTZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ_ZERO_UNDEF: {
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ // If we have a known 1, its position is our upper bound.
+ unsigned PossibleLZ = Known2.countMaxLeadingZeros();
+ unsigned LowBits = Log2_32(PossibleLZ) + 1;
+ Known.Zero.setBitsFrom(LowBits);
+ break;
+ }
case ISD::CTPOP: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clearAllBits();
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ // If we know some of the bits are zero, they can't be one.
+ unsigned PossibleOnes = Known2.countMaxPopulation();
+ Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
break;
}
case ISD::LOAD: {
@@ -2412,76 +2405,87 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ Known.Zero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
if (LD->getExtensionType() == ISD::NON_EXTLOAD)
- computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
}
break;
}
- case ISD::ZERO_EXTEND: {
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
- KnownZero = KnownZero.trunc(InBits);
- KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Known = Known.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), Known,
+ DemandedElts.zext(InVT.getVectorNumElements()),
Depth + 1);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- KnownZero |= NewBits;
+ Known = Known.zext(BitWidth);
+ Known.Zero.setBitsFrom(InBits);
break;
}
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarSizeInBits();
+ Known = Known.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth);
+ Known.Zero.setBitsFrom(InBits);
+ break;
+ }
+ // TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- KnownZero = KnownZero.trunc(InBits);
- KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
+ Known = Known.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
- KnownZero = KnownZero.sext(BitWidth);
- KnownOne = KnownOne.sext(BitWidth);
+ Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- KnownZero = KnownZero.trunc(InBits);
- KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
+ Known = Known.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- KnownZero = KnownZero.zext(InBits);
- KnownOne = KnownOne.zext(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- KnownZero = KnownZero.trunc(BitWidth);
- KnownOne = KnownOne.trunc(BitWidth);
+ Known = Known.zext(InBits);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ Known = Known.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- KnownZero |= (~InMask);
- KnownOne &= (~KnownZero);
+ computeKnownBits(Op.getOperand(0), Known, Depth+1);
+ Known.Zero |= (~InMask);
+ Known.One &= (~Known.Zero);
break;
}
case ISD::FGETSIGN:
// All bits are zero except the low bit.
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ Known.Zero.setBitsFrom(1);
break;
-
- case ISD::SUB: {
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SUB:
+ case ISD::SUBC: {
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
@@ -2490,22 +2494,47 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
Depth + 1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
// from [0-C].
- if ((KnownZero2 & MaskV) == MaskV) {
+ if ((Known2.Zero & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ Known.Zero.setHighBits(NLZ2);
}
}
}
- LLVM_FALLTHROUGH;
+
+ // If low bits are know to be zero in both operands, then we know they are
+ // going to be 0 in the result. Both addition and complement operations
+ // preserve the low zero bits.
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ if (KnownZeroLow == 0)
+ break;
+
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+ Known.Zero.setLowBits(KnownZeroLow);
+ break;
}
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::ADDCARRY:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::ADD:
+ case ISD::ADDC:
case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
@@ -2514,31 +2543,28 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// known to be clear. For example, if one input has the top 10 bits clear
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
- unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
- unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
Depth + 1);
- KnownZeroHigh = std::min(KnownZeroHigh,
- KnownZero2.countLeadingOnes());
- KnownZeroLow = std::min(KnownZeroLow,
- KnownZero2.countTrailingOnes());
-
- if (Opcode == ISD::ADD) {
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
- if (KnownZeroHigh > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
+ KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+
+ if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
+ // With ADDE and ADDCARRY, a carry bit may be added in, so we can only
+ // use this information if we know (at least) that the low two bits are
+ // clear. We then return to the caller that the low bit is unknown but
+ // that other bits are known zero.
+ if (KnownZeroLow >= 2)
+ Known.Zero.setBits(1, KnownZeroLow);
break;
}
- // With ADDE, a carry bit may be added in, so we can only use this
- // information if we know (at least) that the low two bits are clear. We
- // then return to the caller that the low bit is unknown but that other bits
- // are known zero.
- if (KnownZeroLow >= 2) // ADDE
- KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
+ Known.Zero.setLowBits(KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ Known.Zero.setHighBits(KnownZeroHigh - 1);
break;
}
case ISD::SREM:
@@ -2546,23 +2572,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// The low bits of the first operand are unchanged by the srem.
- KnownZero = KnownZero2 & LowBits;
- KnownOne = KnownOne2 & LowBits;
+ Known.Zero = Known2.Zero & LowBits;
+ Known.One = Known2.One & LowBits;
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
- KnownZero |= ~LowBits;
+ if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
+ Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
- KnownOne |= ~LowBits;
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
+ Known.One |= ~LowBits;
+ assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
}
}
break;
@@ -2571,41 +2596,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// The upper bits are all zero, the lower ones are unchanged.
- KnownZero = KnownZero2 | ~LowBits;
- KnownOne = KnownOne2 & LowBits;
+ Known.Zero = Known2.Zero | ~LowBits;
+ Known.One = Known2.One & LowBits;
break;
}
}
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
- Depth + 1);
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
- uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
- KnownZero2.countLeadingOnes());
- KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ uint32_t Leaders =
+ std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
+ Known.resetAll();
+ Known.Zero.setHighBits(Leaders);
break;
}
case ISD::EXTRACT_ELEMENT: {
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), Known, Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
const unsigned BitWidth = Op.getValueSizeInBits();
// Remove low part of known bits mask
- KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
- KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+ Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
+ Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
// Remove high part of known bit mask
- KnownZero = KnownZero.trunc(BitWidth);
- KnownOne = KnownOne.trunc(BitWidth);
+ Known = Known.trunc(BitWidth);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
@@ -2617,24 +2638,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
// anything about the extended bits.
- if (BitWidth > EltBitWidth) {
- KnownZero = KnownZero.trunc(EltBitWidth);
- KnownOne = KnownOne.trunc(EltBitWidth);
- }
+ if (BitWidth > EltBitWidth)
+ Known = Known.trunc(EltBitWidth);
ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
// If we know the element index, just demand that vector element.
unsigned Idx = ConstEltNo->getZExtValue();
APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
- computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
+ computeKnownBits(InVec, Known, DemandedElt, Depth + 1);
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
- }
- if (BitWidth > EltBitWidth) {
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
+ computeKnownBits(InVec, Known, Depth + 1);
}
+ if (BitWidth > EltBitWidth)
+ Known = Known.zext(BitWidth);
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -2646,60 +2663,110 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
// If we know the element index, split the demand between the
// source vector and the inserted element.
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth);
unsigned EltIdx = CEltNo->getZExtValue();
// If we demand the inserted element then add its common known bits.
if (DemandedElts[EltIdx]) {
- computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
- KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
- KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+ computeKnownBits(InVal, Known2, Depth + 1);
+ Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+ Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
// If we demand the source vector then add its common known bits, ensuring
// that we don't demand the inserted element.
APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
if (!!VectorElts) {
- computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1);
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ computeKnownBits(InVec, Known2, VectorElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
} else {
// Unknown element index, so ignore DemandedElts and demand them all.
- computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
- computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
- KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
- KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+ computeKnownBits(InVec, Known, Depth + 1);
+ computeKnownBits(InVal, Known2, Depth + 1);
+ Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
+ Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
}
break;
}
+ case ISD::BITREVERSE: {
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known.Zero = Known2.Zero.reverseBits();
+ Known.One = Known2.One.reverseBits();
+ break;
+ }
case ISD::BSWAP: {
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+ Known.Zero = Known2.Zero.byteSwap();
+ Known.One = Known2.One.byteSwap();
+ break;
+ }
+ case ISD::ABS: {
+ computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+
+ // If the source's MSB is zero then we know the rest of the bits already.
+ if (Known2.isNonNegative()) {
+ Known.Zero = Known2.Zero;
+ Known.One = Known2.One;
+ break;
+ }
+
+ // We only know that the absolute values's MSB will be zero iff there is
+ // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
+ Known2.One.clearSignBit();
+ if (Known2.One.getBoolValue()) {
+ Known.Zero = APInt::getSignMask(BitWidth);
+ break;
+ }
+ break;
+ }
+ case ISD::UMIN: {
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+
+ // UMIN - we know that the result will have the maximum of the
+ // known zero leading bits of the inputs.
+ unsigned LeadZero = Known.countMinLeadingZeros();
+ LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
+
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
+ Known.Zero.setHighBits(LeadZero);
+ break;
+ }
+ case ISD::UMAX: {
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
- KnownZero = KnownZero2.byteSwap();
- KnownOne = KnownOne2.byteSwap();
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+
+ // UMAX - we know that the result will have the maximum of the
+ // known one leading bits of the inputs.
+ unsigned LeadOne = Known.countMinLeadingOnes();
+ LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
+
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
+ Known.One.setHighBits(LeadOne);
break;
}
case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX: {
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ case ISD::SMAX: {
+ computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
// If we don't know any bits, early out.
- if (!KnownOne && !KnownZero)
+ if (!Known.One && !Known.Zero)
break;
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
- Depth + 1);
- KnownZero &= KnownZero2;
- KnownOne &= KnownOne2;
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
- KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ Known.Zero.setLowBits(Log2_32(Align));
break;
}
break;
@@ -2712,11 +2779,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth);
break;
}
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+}
+
+SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
+ SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ KnownBits N1Known;
+ computeKnownBits(N1, N1Known);
+ if (N1Known.Zero.getBoolValue()) {
+ KnownBits N0Known;
+ computeKnownBits(N0, N0Known);
+
+ bool overflow;
+ (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
+ if (!overflow)
+ return OFK_Never;
+ }
+
+ // mulhi + 1 never overflow
+ if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
+ (~N1Known.Zero & 0x01) == ~N1Known.Zero)
+ return OFK_Never;
+
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
+ KnownBits N0Known;
+ computeKnownBits(N0, N0Known);
+
+ if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
+ return OFK_Never;
+ }
+
+ return OFK_Sometime;
}
bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
@@ -2730,7 +2831,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
if (Val.getOpcode() == ISD::SHL) {
- auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
}
@@ -2738,14 +2839,14 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// Similarly, a logical right-shift of a constant sign-bit will have exactly
// one bit set.
if (Val.getOpcode() == ISD::SRL) {
- auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
- if (C && C->getAPIntValue().isSignBit())
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
+ if (C && C->getAPIntValue().isSignMask())
return true;
}
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
- if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+ if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
return false;
@@ -2756,22 +2857,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// to handle some common cases.
// Fall back to computeKnownBits to catch other known cases.
- APInt KnownZero, KnownOne;
- computeKnownBits(Val, KnownZero, KnownOne);
- return (KnownZero.countPopulation() == BitWidth - 1) &&
- (KnownOne.countPopulation() == 1);
+ KnownBits Known;
+ computeKnownBits(Val, Known);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned NumElts = DemandedElts.getBitWidth();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
if (Depth == 6)
return 1; // Limit search depth.
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
switch (Op.getOpcode()) {
default: break;
case ISD::AssertSext:
@@ -2786,7 +2899,61 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return Val.getNumSignBits();
}
+ case ISD::BUILD_VECTOR:
+ Tmp = VTBits;
+ for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ return Tmp;
+
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = SVN->getMaskElt(i);
+ if (!DemandedElts[i])
+ continue;
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M < 0)
+ return 1;
+ if ((unsigned)M < NumElts)
+ DemandedLHS.setBit((unsigned)M % NumElts);
+ else
+ DemandedRHS.setBit((unsigned)M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
@@ -2799,7 +2966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return std::max(Tmp, Tmp2);
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt ShiftVal = C->getAPIntValue();
@@ -2887,6 +3054,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
}
break;
case ISD::ADD:
+ case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2895,17 +3063,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
- APInt KnownZero, KnownOne;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownBits Known;
+ computeKnownBits(Op.getOperand(0), Known, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnesValue())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
// out of the result.
- if (KnownZero.isNegative())
+ if (Known.isNonNegative())
return Tmp;
}
@@ -2920,16 +3088,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// Handle NEG.
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
if (CLHS->isNullValue()) {
- APInt KnownZero, KnownOne;
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ KnownBits Known;
+ computeKnownBits(Op.getOperand(1), Known, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnesValue())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
// the output of the NEG has the same number of sign bits as the input.
- if (KnownZero.isNegative())
+ if (Known.isNonNegative())
return Tmp2;
// Otherwise, we treat this like a SUB.
@@ -2961,28 +3129,98 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then get its sign bits.
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (DemandedElts[EltIdx]) {
+ // TODO - handle implicit truncation of inserted elements.
+ if (InVal.getScalarValueSizeInBits() != VTBits)
+ break;
+ Tmp = ComputeNumSignBits(InVal, Depth + 1);
+ }
+
+ // If we demand the source vector then get its sign bits, and determine
+ // the minimum.
+ APInt VectorElts = DemandedElts;
+ VectorElts.clearBit(EltIdx);
+ if (!!VectorElts) {
+ Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
case ISD::EXTRACT_VECTOR_ELT: {
- // At the moment we keep this simple and skip tracking the specific
- // element. This way we get the lowest common denominator for all elements
- // of the vector.
- // TODO: get information for given vector element
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
// anything about sign bits. But if the sizes match we can derive knowledge
// about sign bits from the vector operand.
- if (BitWidth == EltBitWidth)
- return ComputeNumSignBits(Op.getOperand(0), Depth+1);
- break;
+ if (BitWidth != EltBitWidth)
+ break;
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If we know the element index, just demand that subvector elements,
+ // otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ }
+ return ComputeNumSignBits(Src, Depth + 1);
}
- case ISD::EXTRACT_SUBVECTOR:
- return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
case ISD::CONCAT_VECTORS:
- // Determine the minimum number of sign bits across all input vectors.
- // Early out if the result is already 1.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i)
- Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1));
+ // Determine the minimum number of sign bits across all demanded
+ // elts of the input vectors. Early out if the result is already 1.
+ Tmp = std::numeric_limits<unsigned>::max();
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
+ APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+ DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ if (!DemandedSub)
+ continue;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -3008,20 +3246,22 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
- unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
- if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ unsigned NumBits =
+ TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
- APInt KnownZero, KnownOne;
- computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ KnownBits Known;
+ computeKnownBits(Op, Known, DemandedElts, Depth);
APInt Mask;
- if (KnownZero.isNegative()) { // sign bit is 0
- Mask = KnownZero;
- } else if (KnownOne.isNegative()) { // sign bit is 1;
- Mask = KnownOne;
+ if (Known.isNonNegative()) { // sign bit is 0
+ Mask = Known.Zero;
+ } else if (Known.isNegative()) { // sign bit is 1;
+ Mask = Known.One;
} else {
// Nothing known.
return FirstAnswer;
@@ -3054,6 +3294,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
if (getTarget().Options.NoNaNsFPMath)
return true;
+ if (Op->getFlags().hasNoNaNs())
+ return true;
+
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->getValueAPF().isNaN();
@@ -3096,16 +3339,15 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- APInt AZero, AOne;
- APInt BZero, BOne;
- computeKnownBits(A, AZero, AOne);
- computeKnownBits(B, BZero, BOne);
- return (AZero | BZero).isAllOnesValue();
+ KnownBits AKnown, BKnown;
+ computeKnownBits(A, AKnown);
+ computeKnownBits(B, BKnown);
+ return (AKnown.Zero | BKnown.Zero).isAllOnesValue();
}
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
- llvm::SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
assert(llvm::all_of(Ops,
[Ops](SDValue Op) {
@@ -3169,7 +3411,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue Operand) {
+ SDValue Operand, const SDNodeFlags Flags) {
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
@@ -3206,6 +3448,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
@@ -3220,6 +3468,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
+ case ISD::FP16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(APFloat::IEEEhalf(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
}
}
@@ -3261,17 +3520,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
- integerPart x[2];
bool ignored;
- static_assert(integerPartWidth >= 64, "APFloat parts too small!");
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
// FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
- Opcode==ISD::FP_TO_SINT,
- APFloat::rmTowardZero, &ignored);
- if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- APInt api(VT.getSizeInBits(), x);
- return getConstant(api, DL, VT);
+ return getConstant(IntVal, DL, VT);
}
case ISD::BITCAST:
if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
@@ -3281,6 +3537,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
+ case ISD::FP_TO_FP16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(APFloat::IEEEhalf(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt(), DL, VT);
+ }
}
}
@@ -3303,6 +3567,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -3348,7 +3614,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -3364,8 +3630,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
- return getNode(ISD::ZERO_EXTEND, DL, VT,
- Operand.getNode()->getOperand(0));
+ return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
@@ -3384,13 +3649,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
- SDValue OpOp = Operand.getNode()->getOperand(0);
+ SDValue OpOp = Operand.getOperand(0);
if (OpOp.getValueType() == VT)
return OpOp;
}
@@ -3406,20 +3671,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsGT(VT) &&
"Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
- if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ if (Operand.getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- return Operand.getNode()->getOperand(0);
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ if (Operand.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ return Operand.getOperand(0);
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid BSWAP!");
@@ -3464,15 +3735,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
// FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
- return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0),
- &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
+ return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
+ Operand.getOperand(0), Operand.getNode()->getFlags());
if (OpOpcode == ISD::FNEG) // --X -> X
- return Operand.getNode()->getOperand(0);
+ return Operand.getOperand(0);
break;
case ISD::FABS:
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
- return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;
}
@@ -3483,10 +3753,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
+ }
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
@@ -3569,6 +3842,31 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
GA->getOffset() + uint64_t(Offset));
}
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ llvm::any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() ||
+ isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, SDNode *Cst1,
SDNode *Cst2) {
@@ -3578,6 +3876,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ return getUNDEF(VT);
+
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3591,7 +3892,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
return FoldSymbolOffset(Opcode, VT, GA, Cst2);
- if (isCommutativeBinOp(Opcode))
+ if (TLI->isCommutativeBinOp(Opcode))
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
return FoldSymbolOffset(Opcode, VT, GA, Cst1);
@@ -3638,13 +3939,16 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
- const SDNodeFlags *Flags) {
+ const SDNodeFlags Flags) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
if (!VT.isVector())
return SDValue();
@@ -3665,8 +3969,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
- if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
- !all_of(Ops, IsScalarOrSameVectorSize))
+ if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ !llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
// If we are comparing vectors, then the result needs to be a i1 boolean
@@ -3676,7 +3980,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
EVT LegalSVT = VT.getScalarType();
- if (LegalSVT.isInteger()) {
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
if (LegalSVT.bitsLT(VT.getScalarType()))
return SDValue();
@@ -3727,15 +4031,14 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue N1, SDValue N2,
- const SDNodeFlags *Flags) {
+ SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
// Canonicalize constant to RHS if commutative.
- if (isCommutativeBinOp(Opcode)) {
+ if (TLI->isCommutativeBinOp(Opcode)) {
if (N1C && !N2C) {
std::swap(N1C, N2C);
std::swap(N1, N2);
@@ -3910,35 +4213,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
- auto SignExtendInReg = [&](APInt Val) {
+ auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
unsigned FromBits = EVT.getScalarSizeInBits();
Val <<= Val.getBitWidth() - FromBits;
- Val = Val.ashr(Val.getBitWidth() - FromBits);
- return getConstant(Val, DL, VT.getScalarType());
+ Val.ashrInPlace(Val.getBitWidth() - FromBits);
+ return getConstant(Val, DL, ConstantVT);
};
if (N1C) {
const APInt &Val = N1C->getAPIntValue();
- return SignExtendInReg(Val);
+ return SignExtendInReg(Val, VT);
}
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
+ llvm::EVT OpVT = N1.getOperand(0).getValueType();
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
if (Op.isUndef()) {
- Ops.push_back(getUNDEF(VT.getScalarType()));
+ Ops.push_back(getUNDEF(OpVT));
continue;
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- APInt Val = C->getAPIntValue();
- Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
- Ops.push_back(SignExtendInReg(Val));
- continue;
- }
- break;
+ ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ APInt Val = C->getAPIntValue();
+ Ops.push_back(SignExtendInReg(Val, OpVT));
}
- if (Ops.size() == VT.getVectorNumElements())
- return getBuildVector(VT, DL, Ops);
+ return getBuildVector(VT, DL, Ops);
}
break;
}
@@ -4040,6 +4339,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
// during shuffle legalization.
if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
@@ -4110,7 +4422,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
- if (isCommutativeBinOp(Opcode)) {
+ if (TLI->isCommutativeBinOp(Opcode)) {
std::swap(N1, N2);
} else {
switch (Opcode) {
@@ -4186,21 +4498,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2};
if (VT != MVT::Glue) {
- SDValue Ops[] = {N1, N2};
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
- if (Flags)
- E->intersectFlagsWith(Flags);
+ E->intersectFlagsWith(Flags);
return SDValue(E, 0);
}
- N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ N->setFlags(Flags);
+ createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
} else {
- N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
}
InsertNode(N);
@@ -4392,9 +4706,10 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
/// used when a memcpy is turned into a memset when the source is a constant
/// string ptr.
static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
- const TargetLowering &TLI, StringRef Str) {
+ const TargetLowering &TLI,
+ const ConstantDataArraySlice &Slice) {
// Handle vector with all elements zero.
- if (Str.empty()) {
+ if (Slice.Array == nullptr) {
if (VT.isInteger())
return DAG.getConstant(0, dl, VT);
else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
@@ -4413,15 +4728,15 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
assert(!VT.isVector() && "Can't handle vector type here!");
unsigned NumVTBits = VT.getSizeInBits();
unsigned NumVTBytes = NumVTBits / 8;
- unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length));
APInt Val(NumVTBits, 0);
if (DAG.getDataLayout().isLittleEndian()) {
for (unsigned i = 0; i != NumBytes; ++i)
- Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ Val |= (uint64_t)(unsigned char)Slice[i] << i*8;
} else {
for (unsigned i = 0; i != NumBytes; ++i)
- Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
+ Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8;
}
// If the "cost" of materializing the integer immediate is less than the cost
@@ -4438,9 +4753,8 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
}
-/// isMemSrcFromString - Returns true if memcpy source is a string constant.
-///
-static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
+/// Returns true if memcpy source is constant data.
+static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
uint64_t SrcDelta = 0;
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
@@ -4454,8 +4768,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
if (!G)
return false;
- return getConstantStringInfo(G->getGlobal(), Str,
- SrcDelta + G->getOffset(), false);
+ return getConstantDataArrayInfo(G->getGlobal(), Slice, 8,
+ SrcDelta + G->getOffset());
}
/// Determines the optimal series of memory ops to replace the memset / memcpy.
@@ -4486,23 +4800,23 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
DAG.getMachineFunction());
if (VT == MVT::Other) {
- if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
- VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
- } else {
- switch (DstAlign & 7) {
- case 0: VT = MVT::i64; break;
- case 4: VT = MVT::i32; break;
- case 2: VT = MVT::i16; break;
- default: VT = MVT::i8; break;
- }
- }
-
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
MVT LVT = MVT::i64;
while (!TLI.isTypeLegal(LVT))
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
assert(LVT.isInteger());
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
if (VT.bitsGT(LVT))
VT = LVT;
}
@@ -4587,6 +4901,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// TODO: In the AlwaysInline case, if the size is big then generate a loop
// rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4598,30 +4914,30 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- StringRef Str;
- bool CopyFromStr = isMemSrcFromString(Src, Str);
- bool isZeroStr = CopyFromStr && Str.empty();
+ ConstantDataArraySlice Slice;
+ bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
+ bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
- (isZeroStr ? 0 : SrcAlign),
- false, false, CopyFromStr, true,
+ (isZeroConstant ? 0 : SrcAlign),
+ false, false, CopyFromConstant, true,
DstPtrInfo.getAddrSpace(),
SrcPtrInfo.getAddrSpace(),
DAG, TLI))
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
// Don't promote to an alignment that would require dynamic stack
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
while (NewAlign > Align &&
- DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+ DL.exceedsNaturalStackAlignment(NewAlign))
NewAlign /= 2;
if (NewAlign > Align) {
@@ -4650,18 +4966,29 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
DstOff -= VTSize - Size;
}
- if (CopyFromStr &&
- (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ if (CopyFromConstant &&
+ (isZeroConstant || (VT.isInteger() && !VT.isVector()))) {
// It's unlikely a store of a vector immediate can be done in a single
// instruction. It would require a load from a constantpool first.
// We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
- Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
+ ConstantDataArraySlice SubSlice;
+ if (SrcOff < Slice.Length) {
+ SubSlice = Slice;
+ SubSlice.move(SrcOff);
+ } else {
+ // This is an out-of-bounds access and hence UB. Pretend we read zero.
+ SubSlice.Array = nullptr;
+ SubSlice.Offset = 0;
+ SubSlice.Length = VTSize;
+ }
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode())
Store = DAG.getStore(Chain, dl, Value,
DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), Align,
+ MMOFlags);
}
if (!Store.getNode()) {
@@ -4670,12 +4997,19 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// thing to do is generate a LoadExt/StoreTrunc pair. These simplify
// to Load/Store if NVT==VT.
// FIXME does the case above also need this?
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ EVT NVT = TLI.getTypeToTransformTo(C, VT);
assert(NVT.bitsGE(VT));
+
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
DAG.getMemBasePlusOffset(Src, SrcOff, dl),
SrcPtrInfo.getWithOffset(SrcOff), VT,
- MinAlign(SrcAlign, SrcOff), MMOFlags);
+ MinAlign(SrcAlign, SrcOff), SrcMMOFlags);
OutChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
@@ -4703,6 +5037,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// Expand memmove to a series of load and store ops if the size operand falls
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &C = *DAG.getContext();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -4725,8 +5061,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
return SDValue();
if (DstAlignCanChange) {
- Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ Type *Ty = MemOps[0].getTypeForEVT(C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -4747,9 +5083,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value;
+ bool isDereferenceable =
+ SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL);
+ MachineMemOperand::Flags SrcMMOFlags = MMOFlags;
+ if (isDereferenceable)
+ SrcMMOFlags |= MachineMemOperand::MODereferenceable;
+
Value =
DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -4943,11 +5285,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5004,11 +5346,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5066,11 +5408,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5104,7 +5446,7 @@ SDValue SelectionDAG::getAtomicCmpSwap(
unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment, AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+ AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -5120,7 +5462,7 @@ SDValue SelectionDAG::getAtomicCmpSwap(
MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SynchScope, SuccessOrdering,
+ AAMDNodes(), nullptr, SSID, SuccessOrdering,
FailureOrdering);
return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
@@ -5142,7 +5484,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
const Value *PtrVal, unsigned Alignment,
AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ SyncScope::ID SSID) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
@@ -5162,7 +5504,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SynchScope, Ordering);
+ nullptr, SSID, Ordering);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
}
@@ -5246,7 +5588,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
Opcode == ISD::PREFETCH ||
Opcode == ISD::LIFETIME_START ||
Opcode == ISD::LIFETIME_END ||
- (Opcode <= INT_MAX &&
+ ((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -5580,7 +5922,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
-
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
@@ -5722,11 +6063,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
+ ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
- case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
@@ -5734,13 +6075,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
switch (Opcode) {
default: break;
- case ISD::CONCAT_VECTORS: {
+ case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
- }
- case ISD::SELECT_CC: {
+ case ISD::SELECT_CC:
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
"LHS and RHS of condition must have same type!");
@@ -5749,14 +6089,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Ops[2].getValueType() == VT &&
"select_cc node must be of same type as true and false value!");
break;
- }
- case ISD::BR_CC: {
+ case ISD::BR_CC:
assert(NumOps == 5 && "BR_CC takes 5 operands!");
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
}
- }
// Memoize nodes.
SDNode *N;
@@ -6238,6 +6576,62 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
return N;
}
+SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
+ unsigned OrigOpc = Node->getOpcode();
+ unsigned NewOpc;
+ bool IsUnary = false;
+ switch (OrigOpc) {
+ default:
+ llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
+ case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
+ case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
+ case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
+ case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
+ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
+ case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
+ case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
+ case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
+ case ISD::STRICT_FNEARBYINT:
+ NewOpc = ISD::FNEARBYINT;
+ IsUnary = true;
+ break;
+ }
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
+ SDNode *Res = nullptr;
+ if (IsUnary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2) });
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ ReplaceAllUsesWith(Node, Res);
+ RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
/// getMachineNode - These are used for target selectors to create a new node
/// with specified return type(s), MachineInstr opcode, and operands.
@@ -6384,14 +6778,13 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
ArrayRef<SDValue> Ops,
- const SDNodeFlags *Flags) {
+ const SDNodeFlags Flags) {
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
- if (Flags)
- E->intersectFlagsWith(Flags);
+ E->intersectFlagsWith(Flags);
return E;
}
}
@@ -6452,7 +6845,7 @@ public:
: SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
-}
+} // end anonymous namespace
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
@@ -6498,7 +6891,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
AddModifiedNodeToCSEMaps(User);
}
-
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
setRoot(To);
@@ -6668,6 +7060,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
}
namespace {
+
/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
/// to record information about a use.
struct UseMemo {
@@ -6680,7 +7073,8 @@ namespace {
bool operator<(const UseMemo &L, const UseMemo &R) {
return (intptr_t)L.User < (intptr_t)R.User;
}
-}
+
+} // end anonymous namespace
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
@@ -6746,7 +7140,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
/// based on their topological order. It returns the maximum id and a vector
/// of the SDNodes* in assigned order by reference.
unsigned SelectionDAG::AssignTopologicalOrder() {
-
unsigned DAGSize = 0;
// SortedPos tracks the progress of the algorithm. Nodes before it are
@@ -6872,6 +7265,25 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
AddDbgValue(I, ToNode, false);
}
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ // The new memory operation must have the same position as the old load in
+ // terms of memory dependency. Create a TokenFactor for the old load and new
+ // memory operation and update uses of the old load's output chain to use that
+ // TokenFactor.
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
+ if (!OldLoad->hasAnyUseOfValue(1))
+ return NewChain;
+
+ SDValue TokenFactor =
+ getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
+ ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ return TokenFactor;
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -6973,6 +7385,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
}
namespace {
+
struct EVTArray {
std::vector<EVT> VTs;
@@ -6982,11 +7395,12 @@ namespace {
VTs.push_back(MVT((MVT::SimpleValueType)i));
}
};
-}
-static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+} // end anonymous namespace
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
static ManagedStatic<EVTArray> SimpleVTArray;
-static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+static ManagedStatic<sys::SmartMutex<true>> VTMutex;
/// getValueTypeList - Return a pointer to the specified value type.
///
@@ -7020,7 +7434,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
return NUses == 0;
}
-
/// hasAnyUseOfValue - Return true if there are any use of the indicated
/// value. This method ignores uses of other values defined by this operation.
bool SDNode::hasAnyUseOfValue(unsigned Value) const {
@@ -7033,9 +7446,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
return false;
}
-
/// isOnlyUserOf - Return true if this node is the only use of N.
-///
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
@@ -7049,8 +7460,22 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
return Seen;
}
+/// Return true if the only users of N are contained in Nodes.
+bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (llvm::any_of(Nodes,
+ [&User](const SDNode *Node) { return User == Node; }))
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
/// isOperand - Return true if this node is an operand of N.
-///
bool SDValue::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (*this == Op)
@@ -7070,21 +7495,39 @@ bool SDNode::isOperandOf(const SDNode *N) const {
/// side-effecting instructions on any chain path. In practice, this looks
/// through token factors and non-volatile loads. In order to remain efficient,
/// this only looks a couple of nodes in, it does not do an exhaustive search.
+///
+/// Note that we only need to examine chains when we're searching for
+/// side-effects; SelectionDAG requires that all side-effects are represented
+/// by chains, even if another operand would force a specific ordering. This
+/// constraint is necessary to allow transformations like splitting loads.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
- unsigned Depth) const {
+ unsigned Depth) const {
if (*this == Dest) return true;
// Don't search too deeply, we just want to be able to see through
// TokenFactor's etc.
if (Depth == 0) return false;
- // If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF does not reach dest, then we cannot do the xform.
+ // If this is a token factor, all inputs to the TF happen in parallel.
if (getOpcode() == ISD::TokenFactor) {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return false;
- return true;
+ // First, try a shallow search.
+ if (is_contained((*this)->ops(), Dest)) {
+ // We found the chain we want as an operand of this TokenFactor.
+ // Essentially, we reach the chain without side-effects if we could
+ // serialize the TokenFactor into a simple chain of operations with
+ // Dest as the last operation. This is automatically true if the
+ // chain has one use: there are no other ordering constraints.
+ // If the chain has more than one use, we give up: some other
+ // use of Dest might force a side-effect between Dest and the current
+ // node.
+ if (Dest.hasOneUse())
+ return true;
+ }
+ // Next, try a deep search: check whether every operand of the TokenFactor
+ // reaches Dest.
+ return llvm::all_of((*this)->ops(), [=](SDValue Op) {
+ return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
+ });
}
// Loads don't have side effects, look through them.
@@ -7102,20 +7545,8 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
return hasPredecessorHelper(N, Visited, Worklist);
}
-uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
- assert(Num < NumOperands && "Invalid child # of SDNode!");
- return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
-}
-
-const SDNodeFlags *SDNode::getFlags() const {
- if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
- return &FlagsNode->Flags;
- return nullptr;
-}
-
-void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
- if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
- FlagsNode->Flags.intersectWith(Flags);
+void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
+ this->Flags.intersectWith(Flags);
}
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
@@ -7204,49 +7635,16 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
SDValue Loc = LD->getOperand(1);
SDValue BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI.getObjectSize(FI);
- int BFS = MFI.getObjectSize(BFI);
- if (FS != BFS || FS != (int)Bytes) return false;
- return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
- }
-
- // Handle X + C.
- if (isBaseWithConstantOffset(Loc)) {
- int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
- if (Loc.getOperand(0) == BaseLoc) {
- // If the base location is a simple address with no offset itself, then
- // the second load's first add operand should be the base address.
- if (LocOffset == Dist * (int)Bytes)
- return true;
- } else if (isBaseWithConstantOffset(BaseLoc)) {
- // The base location itself has an offset, so subtract that value from the
- // second load's offset before comparing to distance * size.
- int64_t BOffset =
- cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
- if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
- if ((LocOffset - BOffset) == Dist * (int)Bytes)
- return true;
- }
- }
- }
- const GlobalValue *GV1 = nullptr;
- const GlobalValue *GV2 = nullptr;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
- bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Bytes);
+
+ auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
+ auto LocDecomp = BaseIndexOffset::match(Loc, *this);
+
+ int64_t Offset = 0;
+ if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
+ return (Dist * Bytes == Offset);
return false;
}
-
/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
@@ -7255,10 +7653,9 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
- APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
- llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
- getDataLayout());
- unsigned AlignBits = KnownZero.countTrailingOnes();
+ KnownBits Known(PtrWidth);
+ llvm::computeKnownBits(GV, Known, getDataLayout());
+ unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
return MinAlign(Align, GVOffset);
@@ -7292,14 +7689,11 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
// Currently all types are split in half.
EVT LoVT, HiVT;
- if (!VT.isVector()) {
+ if (!VT.isVector())
LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
- } else {
- unsigned NumElements = VT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
- NumElements/2);
- }
+ else
+ LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext());
+
return std::make_pair(LoVT, HiVT);
}
@@ -7341,59 +7735,58 @@ unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
}
-
Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
return Val.ConstVal->getType();
}
-bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
- APInt &SplatUndef,
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
unsigned MinSplatBits,
- bool isBigEndian) const {
+ bool IsBigEndian) const {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
- unsigned sz = VT.getSizeInBits();
- if (MinSplatBits > sz)
+ unsigned VecWidth = VT.getSizeInBits();
+ if (MinSplatBits > VecWidth)
return false;
- SplatValue = APInt(sz, 0);
- SplatUndef = APInt(sz, 0);
+ // FIXME: The widths are based on this node's type, but build vectors can
+ // truncate their operands.
+ SplatValue = APInt(VecWidth, 0);
+ SplatUndef = APInt(VecWidth, 0);
- // Get the bits. Bits with undefined values (when the corresponding element
+ // Get the bits. Bits with undefined values (when the corresponding element
// of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
- // in SplatValue. If any of the values are not constant, give up and return
+ // in SplatValue. If any of the values are not constant, give up and return
// false.
- unsigned int nOps = getNumOperands();
- assert(nOps > 0 && "isConstantSplat has 0-size build vector");
- unsigned EltBitSize = VT.getScalarSizeInBits();
+ unsigned int NumOps = getNumOperands();
+ assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltWidth = VT.getScalarSizeInBits();
- for (unsigned j = 0; j < nOps; ++j) {
- unsigned i = isBigEndian ? nOps-1-j : j;
+ for (unsigned j = 0; j < NumOps; ++j) {
+ unsigned i = IsBigEndian ? NumOps - 1 - j : j;
SDValue OpVal = getOperand(i);
- unsigned BitPos = j * EltBitSize;
+ unsigned BitPos = j * EltWidth;
if (OpVal.isUndef())
- SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
- else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
- zextOrTrunc(sz) << BitPos;
- else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
- SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
- else
+ SplatUndef.setBits(BitPos, BitPos + EltWidth);
+ else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
+ else
return false;
}
- // The build_vector is all constants or undefs. Find the smallest element
+ // The build_vector is all constants or undefs. Find the smallest element
// size that splats the vector.
-
HasAnyUndefs = (SplatUndef != 0);
- while (sz > 8) {
- unsigned HalfSize = sz / 2;
+ // FIXME: This does not work for vectors with elements less than 8 bits.
+ while (VecWidth > 8) {
+ unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
APInt LowValue = SplatValue.trunc(HalfSize);
APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
@@ -7407,10 +7800,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
SplatValue = HighValue | LowValue;
SplatUndef = HighUndef & LowUndef;
- sz = HalfSize;
+ VecWidth = HalfSize;
}
- SplatBitSize = sz;
+ SplatBitSize = VecWidth;
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
new file mode 100644
index 0000000..0d69441
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -0,0 +1,115 @@
+//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
+//Analysis ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
+ const SelectionDAG &DAG, int64_t &Off) {
+ // Initial Offset difference.
+ Off = Other.Offset - Offset;
+
+ if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
+ // Trivial match.
+ if (Other.Base == Base)
+ return true;
+
+ // Match GlobalAddresses
+ if (auto *A = dyn_cast<GlobalAddressSDNode>(Base))
+ if (auto *B = dyn_cast<GlobalAddressSDNode>(Other.Base))
+ if (A->getGlobal() == B->getGlobal()) {
+ Off += B->getOffset() - A->getOffset();
+ return true;
+ }
+
+ const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+
+ // Match non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (MFI.isFixedObjectIndex(A->getIndex()) &&
+ MFI.isFixedObjectIndex(B->getIndex())) {
+ Off += MFI.getObjectOffset(B->getIndex()) -
+ MFI.getObjectOffset(A->getIndex());
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Parses tree in Ptr for base, index, offset addresses.
+BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
+ // (((B + I*M) + c)) + c ...
+ SDValue Base = Ptr;
+ SDValue Index = SDValue();
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+
+ // Consume constant adds & ors with appropriate masking.
+ while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
+ // Only consider ORs which act as adds.
+ if (Base->getOpcode() == ISD::OR &&
+ !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue()))
+ break;
+ Offset += C->getSExtValue();
+ Base = Base->getOperand(0);
+ continue;
+ }
+ break;
+ }
+
+ if (Base->getOpcode() == ISD::ADD) {
+ // TODO: The following code appears to be needless as it just
+ // bails on some Ptrs early, reducing the cases where we
+ // find equivalence. We should be able to remove this.
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Base is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Base->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ Index = Base->getOperand(1);
+ SDValue PotentialBase = Base->getOperand(0);
+
+ // Skip signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Check if Index Offset pattern
+ if (Index->getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Index->getOperand(1)))
+ return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt);
+
+ Offset += cast<ConstantSDNode>(Index->getOperand(1))->getSExtValue();
+ Index = Index->getOperand(0);
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else
+ IsIndexSignExt = false;
+ Base = PotentialBase;
+ }
+ return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
+}
+} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 996c95b..1273120 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -83,24 +83,6 @@ LimitFPPrecision("limit-float-precision",
"for some float libcalls"),
cl::location(LimitFloatPrecision),
cl::init(0));
-
-static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
- cl::desc("Enable fast-math-flags for DAG nodes"));
-
-/// Minimum jump table density for normal functions.
-static cl::opt<unsigned>
-JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
- cl::desc("Minimum density for building a jump table in "
- "a normal function"));
-
-/// Minimum jump table density for -Os or -Oz functions.
-static cl::opt<unsigned>
-OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden,
- cl::desc("Minimum density for building a jump table in "
- "an optsize function"));
-
-
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
@@ -117,9 +99,31 @@ OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden,
// store [4096 x i8] %data, [4096 x i8]* %buffer
static const unsigned MaxParallelChains = 64;
+// True if the Value passed requires ABI mangling as it is a parameter to a
+// function or a return value from a function which is not an intrinsic.
+static bool isABIRegCopy(const Value * V) {
+ const bool IsRetInst = V && isa<ReturnInst>(V);
+ const bool IsCallInst = V && isa<CallInst>(V);
+ const bool IsInLineAsm =
+ IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
+ const bool IsIndirectFunctionCall =
+ IsCallInst && !IsInLineAsm &&
+ !static_cast<const CallInst *>(V)->getCalledFunction();
+ // It is possible that the call instruction is an inline asm statement or an
+ // indirect function call in which case the return value of
+ // getCalledFunction() would be nullptr.
+ const bool IsInstrinsicCall =
+ IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
+ static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
+ Intrinsic::not_intrinsic;
+
+ return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
+}
+
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V);
+ MVT PartVT, EVT ValueVT, const Value *V,
+ bool IsABIRegCopy);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -129,10 +133,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<ISD::NodeType> AssertOp = None) {
+ Optional<ISD::NodeType> AssertOp = None,
+ bool IsABIRegCopy = false) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
- PartVT, ValueVT, V);
+ PartVT, ValueVT, V, IsABIRegCopy);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -276,7 +281,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V) {
+ MVT PartVT, EVT ValueVT, const Value *V,
+ bool IsABIRegCopy) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -287,9 +293,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- unsigned NumRegs =
- TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
+ unsigned NumRegs;
+
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -318,9 +333,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
+ EVT BuiltVectorTy =
+ EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+ (IntermediateVT.isVector()
+ ? IntermediateVT.getVectorNumElements() * NumParts
+ : NumIntermediates));
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
- DL, ValueVT, Ops);
+ DL, BuiltVectorTy, Ops);
}
// There is now one part, held in Val. Correct it to match ValueVT.
@@ -359,23 +379,40 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
TLI.isTypeLegal(ValueVT))
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- // Handle cases such as i8 -> <1 x i1>
if (ValueVT.getVectorNumElements() != 1) {
- diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
- "non-trivial scalar-to-vector conversion");
- return DAG.getUNDEF(ValueVT);
+ // Certain ABIs require that vectors are passed as integers. For vectors
+ // are the same size, this is an obvious bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+ // Bitcast Val back the original type and extract the corresponding
+ // vector we want.
+ unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+ EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(), Elts);
+ Val = DAG.getBitcast(WiderVecType, Val);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ diagnosePossiblyInvalidConstraint(
+ *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+ return DAG.getUNDEF(ValueVT);
}
- if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT)
- Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
+ // Handle cases such as i8 -> <1 x i1>
+ EVT ValueSVT = ValueVT.getVectorElementType();
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
+ Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
+ : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+ return DAG.getBuildVector(ValueVT, DL, Val);
}
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V);
+ MVT PartVT, const Value *V, bool IsABIRegCopy);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
@@ -383,12 +420,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+ bool IsABIRegCopy = false) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
- return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+ IsABIRegCopy);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -513,7 +552,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V) {
+ MVT PartVT, const Value *V,
+ bool IsABIRegCopy) {
+
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -541,7 +582,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
- Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops);
+ Val = DAG.getBuildVector(PartVT, DL, Ops);
// FIXME: Use CONCAT for 2x -> 4x.
@@ -554,17 +595,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
- } else{
- // Vector -> scalar conversion.
- assert(ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ } else {
+ if (ValueVT.getVectorNumElements() == 1) {
+ Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else {
+ assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+ "lossy conversion of vector to scalar type");
+ EVT IntermediateType =
+ EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getBitcast(IntermediateType, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ }
}
+ assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
Parts[0] = Val;
return;
}
@@ -573,15 +620,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
- IntermediateVT,
- NumIntermediates, RegisterVT);
+ unsigned NumRegs;
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
unsigned NumElements = ValueVT.getVectorNumElements();
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ // Convert the vector to the appropiate type if necessary.
+ unsigned DestVectorNoElts =
+ NumIntermediates *
+ (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+ EVT BuiltVectorTy = EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+ if (Val.getValueType() != BuiltVectorTy)
+ Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -614,30 +677,35 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
}
-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt)
- : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+ EVT valuevt, bool IsABIMangledValue)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+ RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty) {
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ bool IsABIMangledValue) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
+ IsABIMangled = IsABIMangledValue;
+
for (EVT ValueVT : ValueVTs) {
- unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+ unsigned NumRegs = IsABIMangledValue
+ ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = IsABIMangledValue
+ ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
+ RegCount.push_back(NumRegs);
Reg += NumRegs;
}
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
@@ -654,8 +722,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
- MVT RegisterVT = RegVTs[Value];
+ unsigned NumRegs = RegCount[Value];
+ MVT RegisterVT = IsABIMangled
+ ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -683,7 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
unsigned RegSize = RegisterVT.getSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
- unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+ unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
if (NumZeroBits == RegSize) {
// The current value is a zero.
@@ -739,10 +809,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
@@ -754,9 +820,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
- MVT RegisterVT = RegVTs[Value];
+ unsigned NumParts = RegCount[Value];
+
+ MVT RegisterVT = IsABIMangled
+ ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -796,9 +864,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
@@ -840,9 +905,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
- AA = &aa;
+ AA = aa;
GFI = gfi;
LibInfo = li;
DL = &DAG.getDataLayout();
@@ -850,12 +915,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
LPadToCallSiteMap.clear();
}
-/// clear - Clear out the current SelectionDAG and the associated
-/// state and prepare this SelectionDAGBuilder object to be used
-/// for a new block. This doesn't clear out information about
-/// additional blocks that are needed to complete switch lowering
-/// or PHI node updating; that information is cleared out as it is
-/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
@@ -867,21 +926,10 @@ void SelectionDAGBuilder::clear() {
StatepointLowering.clear();
}
-/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is separated from the clear so that debug
-/// information that is dangling in a basic block can be properly
-/// resolved in a different basic block. This allows the
-/// SelectionDAG to resolve dangling debug information attached
-/// to PHI nodes.
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-/// getRoot - Return the current virtual root of the Selection DAG,
-/// flushing any PendingLoad items. This must be done before emitting
-/// a store or any other node that may need to be ordered after any
-/// prior load instructions.
-///
SDValue SelectionDAGBuilder::getRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
@@ -901,10 +949,6 @@ SDValue SelectionDAGBuilder::getRoot() {
return Root;
}
-/// getControlRoot - Similar to getRoot, but instead of flushing all the
-/// PendingLoad items, flush all the PendingExports items. It is necessary
-/// to do this before emitting a terminator instruction.
-///
SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();
@@ -937,7 +981,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
- ++SDNodeOrder;
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
CurInst = &I;
@@ -1001,10 +1047,12 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
+
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty);
+ DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
SDValue Chain = DAG.getEntryNode();
- Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+ V);
resolveDanglingDebugInfo(V, Result);
}
@@ -1122,8 +1170,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ArrayType>(CDS->getType()))
return DAG.getMergeValues(Ops, getCurSDLoc());
- return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
- VT, Ops);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
@@ -1175,7 +1222,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
// Create a BUILD_VECTOR node.
- return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
// If this is a static alloca, generate it as the frameindex instead of
@@ -1185,14 +1232,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end())
return DAG.getFrameIndex(SI->second,
- TLI.getPointerTy(DAG.getDataLayout()));
+ TLI.getFrameIndexTy(DAG.getDataLayout()));
}
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType());
+ Inst->getType(), isABIRegCopy(V));
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1384,7 +1432,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
RetPtr.getValueType(), RetPtr,
DAG.getIntPtrConstant(Offsets[i],
getCurSDLoc()),
- &Flags);
+ Flags);
Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
// FIXME: better loc info would be nice.
@@ -1403,16 +1451,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasAttribute(
+ AttributeList::ReturnIndex, Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1420,12 +1468,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegisters(Context, VT);
- MVT PartVT = TLI.getRegisterType(Context, VT);
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, &I, ExtendKind);
+ &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -1461,9 +1509,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
true /*isfixed*/, 1 /*origidx*/,
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
- OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(
- FuncInfo.MBB, FuncInfo.SwiftErrorArg),
- EVT(TLI.getPointerTy(DL))));
+ OutVals.push_back(
+ DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
+ &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+ EVT(TLI.getPointerTy(DL))));
}
bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
@@ -1582,7 +1631,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
- BranchProbability FProb) {
+ BranchProbability FProb,
+ bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1596,10 +1646,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
- Condition = getICmpCondCode(IC->getPredicate());
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
- Condition = getFCmpCondCode(FC->getPredicate());
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
@@ -1612,7 +1666,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
// Create a CaseBlock record representing this branch.
- CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1625,16 +1680,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
- BranchProbability FProb) {
- // If this node is not part of the or/and tree, emit it as a branch.
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
+ const Value *CondOp = BinaryOperator::getNotArgument(Cond);
+ if (InBlock(CondOp, CurBB->getBasicBlock())) {
+ FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+ }
+
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ unsigned BOpc = 0;
+ if (BOp) {
+ BOpc = BOp->getOpcode();
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOpc != Opc || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TProb, FProb);
+ TProb, FProb, InvertCond);
return;
}
@@ -1669,14 +1752,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1702,14 +1785,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
}
}
@@ -1793,7 +1876,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
- getEdgeProbability(BrMBB, Succ1MBB));
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -2027,7 +2111,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Entry.Node = StackSlot;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
- Entry.isInReg = true;
+ Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2581,15 +2665,15 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
Flags.setVectorReduction(vec_redux);
- if (EnableFMFInDAG) {
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
- }
+ Flags.setAllowReciprocal(FMF.allowReciprocal());
+ Flags.setAllowContract(FMF.allowContract());
+ Flags.setNoInfs(FMF.noInfs());
+ Flags.setNoNaNs(FMF.noNaNs());
+ Flags.setNoSignedZeros(FMF.noSignedZeros());
+ Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
- Op1, Op2, &Flags);
+ Op1, Op2, Flags);
setValue(&I, BinNodeValue);
}
@@ -2642,7 +2726,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
- &Flags);
+ Flags);
setValue(&I, Res);
}
@@ -2654,7 +2738,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) {
Flags.setExact(isa<PossiblyExactOperator>(&I) &&
cast<PossiblyExactOperator>(&I)->isExact());
setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
- Op2, &Flags));
+ Op2, Flags));
}
void SelectionDAGBuilder::visitICmp(const User &I) {
@@ -2914,7 +2998,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
- // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
@@ -3067,14 +3151,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
- // two subvectors and do the shuffle. The analysis is done by calculating
- // the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts),
- static_cast<int>(SrcNumElts)};
- int MaxRange[2] = {-1, -1};
-
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
@@ -3083,41 +3163,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Input = 1;
Idx -= SrcNumElts;
}
- if (Idx > MaxRange[Input])
- MaxRange[Input] = Idx;
- if (Idx < MinRange[Input])
- MinRange[Input] = Idx;
- }
-
- // Check if the access is smaller than the vector size and can we find
- // a reasonable extract index.
- int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
- // Extract.
- int StartIdx[2]; // StartIdx to extract from
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
- RangeUse[Input] = 0; // Unused
- StartIdx[Input] = 0;
- continue;
- }
- // Find a good start index that is a multiple of the mask length. Then
- // see if the rest of the elements are in range.
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
}
- if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
- if (RangeUse[Input] == 0)
+ if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(
@@ -3128,16 +3195,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx >= 0) {
- if (Idx < (int)SrcNumElts)
- Idx -= StartIdx[0];
- else
- Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
- }
- MappedOps.push_back(Idx);
+ SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
@@ -3151,8 +3214,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
@@ -3168,10 +3230,16 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Ops.push_back(Res);
}
- setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops));
+ setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
-void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+void SelectionDAGBuilder::visitInsertValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
+ Indices = IV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
const Value *Op0 = I.getOperand(0);
const Value *Op1 = I.getOperand(1);
Type *AggTy = I.getType();
@@ -3179,7 +3247,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> AggValueVTs;
@@ -3219,13 +3287,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
DAG.getVTList(AggValueVTs), Values));
}
-void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+void SelectionDAGBuilder::visitExtractValue(const User &I) {
+ ArrayRef<unsigned> Indices;
+ if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
+ Indices = EV->getIndices();
+ else
+ Indices = cast<ConstantExpr>(&I)->getIndices();
+
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SmallVector<EVT, 4> ValValueVTs;
@@ -3281,14 +3355,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
- DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
+ DAG.getConstant(Offset, dl, N.getValueType()), Flags);
}
} else {
MVT PtrTy =
@@ -3318,7 +3392,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
- N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
continue;
}
@@ -3326,7 +3400,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue IdxN = getValue(Idx);
if (!IdxN.getValueType().isVector() && VectorWidth) {
- MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
@@ -3396,7 +3470,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
Flags.setNoUnsignedWrap(true);
AllocSize = DAG.getNode(ISD::ADD, dl,
AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
+ DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
// Mask out the low bits for alignment purposes.
AllocSize = DAG.getNode(ISD::AND, dl,
@@ -3459,7 +3533,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(MemoryLocation(
+ else if (AA && AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
@@ -3500,7 +3574,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue A = DAG.getNode(ISD::ADD, dl,
PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT),
- &Flags);
+ Flags);
auto MMOFlags = MachineMemOperand::MONone;
if (isVolatile)
MMOFlags |= MachineMemOperand::MOVolatile;
@@ -3510,6 +3584,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
MMOFlags |= MachineMemOperand::MOInvariant;
if (isDereferenceable)
MMOFlags |= MachineMemOperand::MODereferenceable;
+ MMOFlags |= TLI.getMMOFlags(I);
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -3533,8 +3608,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- assert(TLI.supportSwiftError() &&
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
"call visitStoreToSwiftError when backend supports swifterror");
SmallVector<EVT, 4> ValueVTs;
@@ -3547,15 +3621,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- auto &DL = DAG.getDataLayout();
- const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
- unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned VReg; bool CreatedVReg;
+ std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+ if (CreatedVReg)
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3571,8 +3645,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- assert(!AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) &&
+ assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -3585,7 +3659,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
- FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]);
+ FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
+ ValueVTs[0]);
setValue(&I, L);
}
@@ -3639,6 +3714,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
MMOFlags |= MachineMemOperand::MOVolatile;
if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
MMOFlags |= MachineMemOperand::MONonTemporal;
+ MMOFlags |= TLI.getMMOFlags(I);
// An aggregate load cannot wrap around the address space, so offsets to its
// parts don't wrap either.
@@ -3655,7 +3731,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
ChainI = 0;
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
+ DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
SDValue St = DAG.getStore(
Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
@@ -3853,7 +3929,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation(
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@@ -3897,7 +3973,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(MemoryLocation(
+ AA && AA->pointsToConstantMemory(MemoryLocation(
BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
@@ -3929,7 +4005,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
AtomicOrdering FailureOrder = I.getFailureOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -3939,7 +4015,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope);
+ /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
SDValue OutChain = L.getValue(2);
@@ -3965,7 +4041,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
}
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -3976,7 +4052,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
getValue(I.getPointerOperand()),
getValue(I.getValOperand()),
I.getPointerOperand(),
- /* Alignment=*/ 0, Order, Scope);
+ /* Alignment=*/ 0, Order, SSID);
SDValue OutChain = L.getValue(1);
@@ -3990,16 +4066,16 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
- TLI.getPointerTy(DAG.getDataLayout()));
- Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
- TLI.getPointerTy(DAG.getDataLayout()));
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -4017,7 +4093,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
VT.getStoreSize(),
I.getAlignment() ? I.getAlignment() :
DAG.getEVTAlignment(VT),
- AAMDNodes(), nullptr, Scope, Order);
+ AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
@@ -4034,7 +4110,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering Order = I.getOrdering();
- SynchronizationScope Scope = I.getSynchScope();
+ SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
@@ -4051,7 +4127,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
getValue(I.getPointerOperand()),
getValue(I.getValueOperand()),
I.getPointerOperand(), I.getAlignment(),
- Order, Scope);
+ Order, SSID);
DAG.setRoot(OutChain);
}
@@ -4695,7 +4771,7 @@ static unsigned getUnderlyingArgReg(const SDValue &N) {
/// At the end of instruction selection, they will be inserted to the entry BB.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
+ DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -4709,9 +4785,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
return false;
+ bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
- if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
+ int FI = FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
@@ -4722,15 +4800,19 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (PR)
Reg = PR;
}
- if (Reg)
+ if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
+ IsIndirect = IsDbgDeclare;
+ }
}
if (!Op) {
// Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
- if (VMI != FuncInfo.ValueMap.end())
+ if (VMI != FuncInfo.ValueMap.end()) {
Op = MachineOperand::CreateReg(VMI->second, false);
+ IsIndirect = IsDbgDeclare;
+ }
}
if (!Op && N.getNode())
@@ -4752,7 +4834,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(Offset)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4764,26 +4846,17 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr, int64_t Offset,
- DebugLoc dl,
+ const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
- SDDbgValue *SDV;
- auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
- if (FISDN && Expr->startsWithDeref()) {
+ if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations as such instead of as indirectly addressed
// locations.
- ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1,
- Expr->elements_end());
- DIExpression *DerefedDIExpr =
- DIExpression::get(*DAG.getContext(), TrailingElements);
- int FI = FISDN->getIndex();
- SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl,
- DbgSDNodeOrder);
- } else {
- SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
- Offset, dl, DbgSDNodeOrder);
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl,
+ DbgSDNodeOrder);
}
- return SDV;
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
+ Offset, dl, DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -4794,9 +4867,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
-/// we want to emit this as a call to a named external function, return the name
-/// otherwise lower it and return null.
+/// Lower the call to the specified intrinsic function. If we want to emit this
+/// as a call to a named external function, return the name. Otherwise, lower it
+/// and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -4897,11 +4970,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
updateDAGForMaybeTailCall(MM);
return nullptr;
}
- case Intrinsic::memcpy_element_atomic: {
- SDValue Dst = getValue(I.getArgOperand(0));
- SDValue Src = getValue(I.getArgOperand(1));
- SDValue NumElements = getValue(I.getArgOperand(2));
- SDValue ElementSize = getValue(I.getArgOperand(3));
+ case Intrinsic::memcpy_element_unordered_atomic: {
+ const ElementUnorderedAtomicMemCpyInst &MI =
+ cast<ElementUnorderedAtomicMemCpyInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
// Emit a library call.
TargetLowering::ArgListTy Args;
@@ -4912,31 +4986,101 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Entry.Node = Src;
Args.push_back(Entry);
-
- Entry.Ty = I.getArgOperand(2)->getType();
- Entry.Node = NumElements;
+
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ DAG.setRoot(CallResult.second);
+ return nullptr;
+ }
+ case Intrinsic::memmove_element_unordered_atomic: {
+ auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Src = getValue(MI.getRawSource());
+ SDValue Length = getValue(MI.getLength());
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
Args.push_back(Entry);
-
- Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.Node = ElementSize;
+
+ Entry.Node = Src;
Args.push_back(Entry);
- uint64_t ElementSizeConstant =
- cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ DAG.setRoot(CallResult.second);
+ return nullptr;
+ }
+ case Intrinsic::memset_element_unordered_atomic: {
+ auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
+ SDValue Dst = getValue(MI.getRawDest());
+ SDValue Val = getValue(MI.getValue());
+ SDValue Length = getValue(MI.getLength());
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt8Ty(*DAG.getContext());
+ Entry.Node = Val;
+ Args.push_back(Entry);
+
+ Entry.Ty = MI.getLength()->getType();
+ Entry.Node = Length;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant = MI.getElementSizeInBytes();
RTLIB::Libcall LibraryCall =
- RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+ RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant);
if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl)
- .setChain(getRoot())
- .setCallee(TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(
- TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
DAG.setRoot(CallResult.second);
@@ -4960,6 +5104,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ // Byval arguments with frame indices were already handled after argument
+ // lowering and before isel.
+ const auto *Arg =
+ dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
+ if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ return nullptr;
+
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
@@ -4978,8 +5129,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
- N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N);
return nullptr;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
@@ -4989,22 +5139,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
N)) {
- // If variable is pinned by a alloca in dominating bb then
- // use StaticAllocaMap.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
- if (AI->getParent() != DI.getParent()) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second,
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- return nullptr;
- }
- }
- }
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -5026,45 +5162,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
- } else {
- // Do not use getValue() in here; we don't want to generate code at
- // this point if it hasn't been done yet.
- SDValue N = NodeMap[V];
- if (!N.getNode() && isa<Argument>(V))
- // Check unused arguments map.
- N = UnusedArgNodeMap[V];
- if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
- false, N)) {
- SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- }
- } else if (!V->use_empty() ) {
- // Do not call getValue(V) yet, as we don't want to generate code.
- // Remember it for later.
- DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
- DanglingDebugInfoMap[V] = DDI;
- } else {
- // We may expand this to cover more cases. One case where we have no
- // data available is an unreferenced parameter.
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- }
+ return nullptr;
}
- // Build a debug info table entry.
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
- V = BCI->getOperand(0);
- const AllocaInst *AI = dyn_cast<AllocaInst>(V);
- // Don't handle byval struct arguments or VLAs, for example.
- if (!AI) {
- DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
- DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false,
+ N))
+ return nullptr;
+ SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
return nullptr;
}
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI == FuncInfo.StaticAllocaMap.end())
- return nullptr; // VLAs.
+
+ if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ return nullptr;
+ }
+
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
return nullptr;
}
@@ -5202,7 +5326,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShOps[2];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
+ ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
@@ -5301,6 +5425,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return nullptr;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
+ visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
+ return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5537,7 +5680,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+ .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
@@ -5548,7 +5691,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
@@ -5629,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] =
- DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
+ DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -5690,7 +5833,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
int FI = FuncInfo.StaticAllocaMap[Slot];
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(MF.getName()), Idx);
+ GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
@@ -5711,7 +5854,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
+ GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
@@ -5742,13 +5885,142 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, N);
return nullptr;
}
+ case Intrinsic::xray_customevent: {
+ // Here we want to make sure that the intrinsic behaves as if it has a
+ // specific calling convention, and only for x86_64.
+ // FIXME: Support other platforms later.
+ const auto &Triple = DAG.getTarget().getTargetTriple();
+ if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ return nullptr;
+ SDLoc DL = getCurSDLoc();
+ SmallVector<SDValue, 8> Ops;
+
+ // We want to say that we always want the arguments in registers.
+ SDValue LogEntryVal = getValue(I.getArgOperand(0));
+ SDValue StrSizeVal = getValue(I.getArgOperand(1));
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Chain = getRoot();
+ Ops.push_back(LogEntryVal);
+ Ops.push_back(StrSizeVal);
+ Ops.push_back(Chain);
+
+ // We need to enforce the calling convention for the callsite, so that
+ // argument ordering is enforced correctly, and that register allocation can
+ // see that some registers may be assumed clobbered and have to preserve
+ // them across calls to the intrinsic.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
+ DL, NodeTys, Ops);
+ SDValue patchableNode = SDValue(MN, 0);
+ DAG.setRoot(patchableNode);
+ setValue(&I, patchableNode);
+ return nullptr;
+ }
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return nullptr;
+
+ case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ visitVectorReduce(I, Intrinsic);
+ return nullptr;
+ }
+
}
}
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
+ const ConstrainedFPIntrinsic &FPI) {
+ SDLoc sdl = getCurSDLoc();
+ unsigned Opcode;
+ switch (FPI.getIntrinsicID()) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::experimental_constrained_fadd:
+ Opcode = ISD::STRICT_FADD;
+ break;
+ case Intrinsic::experimental_constrained_fsub:
+ Opcode = ISD::STRICT_FSUB;
+ break;
+ case Intrinsic::experimental_constrained_fmul:
+ Opcode = ISD::STRICT_FMUL;
+ break;
+ case Intrinsic::experimental_constrained_fdiv:
+ Opcode = ISD::STRICT_FDIV;
+ break;
+ case Intrinsic::experimental_constrained_frem:
+ Opcode = ISD::STRICT_FREM;
+ break;
+ case Intrinsic::experimental_constrained_sqrt:
+ Opcode = ISD::STRICT_FSQRT;
+ break;
+ case Intrinsic::experimental_constrained_pow:
+ Opcode = ISD::STRICT_FPOW;
+ break;
+ case Intrinsic::experimental_constrained_powi:
+ Opcode = ISD::STRICT_FPOWI;
+ break;
+ case Intrinsic::experimental_constrained_sin:
+ Opcode = ISD::STRICT_FSIN;
+ break;
+ case Intrinsic::experimental_constrained_cos:
+ Opcode = ISD::STRICT_FCOS;
+ break;
+ case Intrinsic::experimental_constrained_exp:
+ Opcode = ISD::STRICT_FEXP;
+ break;
+ case Intrinsic::experimental_constrained_exp2:
+ Opcode = ISD::STRICT_FEXP2;
+ break;
+ case Intrinsic::experimental_constrained_log:
+ Opcode = ISD::STRICT_FLOG;
+ break;
+ case Intrinsic::experimental_constrained_log10:
+ Opcode = ISD::STRICT_FLOG10;
+ break;
+ case Intrinsic::experimental_constrained_log2:
+ Opcode = ISD::STRICT_FLOG2;
+ break;
+ case Intrinsic::experimental_constrained_rint:
+ Opcode = ISD::STRICT_FRINT;
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ Opcode = ISD::STRICT_FNEARBYINT;
+ break;
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain = getRoot();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ SDValue Result;
+ if (FPI.isUnaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)) });
+ else
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)) });
+
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ DAG.setRoot(OutChain);
+ SDValue FPResult = Result.getValue(0);
+ setValue(&FPI, FPResult);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -5827,7 +6099,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Type *RetTy = CS.getType();
TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
const Value *SwiftErrorVal = nullptr;
@@ -5843,6 +6114,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ TargetLowering::ArgListEntry Entry;
const Value *V = *i;
// Skip empty types
@@ -5852,24 +6124,25 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
// Use swifterror virtual register as input to the call.
- if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node =
- DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V),
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node = DAG.getRegister(FuncInfo
+ .getOrCreateSwiftErrorVRegUseAt(
+ CS.getInstruction(), FuncInfo.MBB, V)
+ .first,
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
- if (Entry.isSRet && isa<Instruction>(V))
+ if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
@@ -5903,38 +6176,29 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
- unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ unsigned VReg; bool CreatedVReg;
+ std::tie(VReg, CreatedVReg) =
+ FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
// We update the virtual register for the actual swifterror argument.
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+ if (CreatedVReg)
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
DAG.setRoot(CopyNode);
}
}
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
- for (const User *U : V->users()) {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (IC->isEquality())
- if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -5949,7 +6213,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything.
- if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode();
ConstantMemory = true;
} else {
@@ -5967,8 +6231,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
-/// processIntegerCallValue - Record the value for an instruction that
-/// produces an integer result, converting the type where necessary.
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
@@ -5981,20 +6245,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a memcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
- if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getArgOperand(2)->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
-
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
@@ -6005,11 +6262,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
- std::pair<SDValue, SDValue> Res =
- TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
- getValue(LHS), getValue(RHS), getValue(Size),
- MachinePointerInfo(LHS),
- MachinePointerInfo(RHS));
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
@@ -6018,88 +6273,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
- bool ActuallyDoIt = true;
- MVT LoadVT;
- Type *LoadTy;
- switch (CSize->getZExtValue()) {
- default:
- LoadVT = MVT::Other;
- LoadTy = nullptr;
- ActuallyDoIt = false;
- break;
- case 2:
- LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(CSize->getContext());
- break;
- case 4:
- LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- break;
- case 8:
- LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(CSize->getContext());
- break;
- /*
- case 16:
- LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- LoadTy = VectorType::get(LoadTy, 4);
- break;
- */
- }
-
- // This turns into unaligned loads. We only do this if the target natively
- // supports the MVT we'll be loading or if it is small enough (<= 4) that
- // we'll only produce a small number of byte loads.
+ if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
+ return false;
- // Require that we can find a legal MVT, and only do this if the target
- // supports unaligned loads of that type. Expanding into byte loads would
- // bloat the code.
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (ActuallyDoIt && CSize->getZExtValue() > 4) {
- unsigned DstAS = LHS->getType()->getPointerAddressSpace();
- unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
- if (!TLI.isTypeLegal(LoadVT) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
- ActuallyDoIt = false;
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
- if (ActuallyDoIt) {
- SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
- SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+ return LVT;
+ };
- SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
- ISD::SETNE);
- processIntegerCallValue(I, Res, false);
- return true;
- }
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
}
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
- return false;
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
}
-/// visitMemChrCall -- See if we can lower a memchr call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
- // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
- if (!Src->getType()->isPointerTy() ||
- !Char->getType()->isIntegerTy() ||
- !Length->getType()->isIntegerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6115,15 +6361,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
-///
-/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
-/// to adjust the dst pointer by the size of the copied memory.
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
-
- // Verify argument count: void *mempcpy(void *, const void *, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
@@ -6158,19 +6401,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
return true;
}
-/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
-/// optimized form. If so, return true and lower it, otherwise return false
-/// and it will be lowered like a normal call.
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
- // Verify that the prototype makes sense. char *strcpy(char *, char *)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6187,19 +6424,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
return false;
}
-/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int strcmp(void*,void*)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6216,17 +6447,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
return false;
}
-/// visitStrLenCall -- See if we can lower a strlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strlen(char *)
- if (I.getNumArgOperands() != 1)
- return false;
-
const Value *Arg0 = I.getArgOperand(0);
- if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6241,19 +6468,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
return false;
}
-/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6269,16 +6490,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
return false;
}
-/// visitUnaryFloatCall - If a call instruction is a unary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a unary floating-point call.
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -6286,17 +6506,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
-/// visitBinaryFloatCall - If a call instruction is a binary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a binary floating-point call.
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp0 = getValue(I.getArgOperand(0));
@@ -6336,20 +6554,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason.
- LibFunc::Func Func;
+ LibFunc Func;
if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- if (I.getNumArgOperands() == 2 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType() &&
- I.onlyReadsMemory()) {
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
@@ -6357,122 +6573,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
break;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
- case LibFunc::sqrt_finite:
- case LibFunc::sqrtf_finite:
- case LibFunc::sqrtl_finite:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
if (visitMemCmpCall(I))
return;
break;
- case LibFunc::mempcpy:
+ case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
- case LibFunc::memchr:
+ case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
- case LibFunc::strlen:
+ case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
- case LibFunc::strnlen:
+ case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
@@ -6648,7 +6864,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
unsigned Align = DL.getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
MachinePointerInfo::getFixedStack(MF, SSFI));
OpInfo.CallOperand = StackSlot;
@@ -6671,12 +6887,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
- OpInfo.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
unsigned NumRegs = 1;
@@ -6684,12 +6900,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a FP input in an integer register (or visa versa) insert a bit
// cast of the input value. More generally, handle any case where the input
// value disagrees with the register class we plan to stick this in.
- if (OpInfo.Type == InlineAsm::isInput &&
- PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ if (OpInfo.Type == InlineAsm::isInput && PhysReg.second &&
+ !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types).
- MVT RegVT = *PhysReg.second->vt_begin();
+ MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
@@ -6717,12 +6933,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
if (unsigned AssignedReg = PhysReg.first) {
const TargetRegisterClass *RC = PhysReg.second;
if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = *RC->vt_begin();
+ ValueVT = *TRI.legalclasstypes_begin(*RC);
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
// remember that AX is actually i16 to get the right extension.
- RegVT = *RC->vt_begin();
+ RegVT = *TRI.legalclasstypes_begin(*RC);
// This is a explicit reference to a physical register.
Regs.push_back(AssignedReg);
@@ -6748,7 +6964,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// Otherwise, if this was a reference to an LLVM register class, create vregs
// for this reference.
if (const TargetRegisterClass *RC = PhysReg.second) {
- RegVT = *RC->vt_begin();
+ RegVT = *TRI.legalclasstypes_begin(*RC);
if (OpInfo.ConstraintVT == MVT::Other)
ValueVT = RegVT;
@@ -7085,8 +7301,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
- Chain, &Flag, CS.getInstruction());
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ CS.getInstruction());
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -7361,7 +7577,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = CS->getOperand(ArgI);
@@ -7370,7 +7586,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -7411,7 +7627,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
Ops.push_back(Builder.DAG.getTargetFrameIndex(
- FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
+ FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
} else
Ops.push_back(OpVal);
}
@@ -7437,11 +7653,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// have to worry about calling conventions and target specific lowering code.
// Instead we perform the call lowering right here.
//
- // chain, flag = CALLSEQ_START(chain, 0)
+ // chain, flag = CALLSEQ_START(chain, 0, 0)
// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
- Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+ Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
// Add the <id> and <numBytes> constants.
@@ -7631,9 +7847,79 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
-/// Returns an AttributeSet representing the attributes applied to the return
+void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2;
+ if (I.getNumArgOperands() > 1)
+ Op2 = getValue(I.getArgOperand(1));
+ SDLoc dl = getCurSDLoc();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Res;
+ FastMathFlags FMF;
+ if (isa<FPMathOperator>(I))
+ FMF = I.getFastMathFlags();
+ SDNodeFlags SDFlags;
+ SDFlags.setNoNaNs(FMF.noNaNs());
+
+ switch (Intrinsic) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ if (FMF.unsafeAlgebra())
+ Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmul:
+ if (FMF.unsafeAlgebra())
+ Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_mul:
+ Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_and:
+ Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_or:
+ Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_xor:
+ Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smax:
+ Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smin:
+ Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umax:
+ Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umin:
+ Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmax: {
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ break;
+ }
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ break;
+ }
+ default:
+ llvm_unreachable("Unhandled vector reduce intrinsic");
+ }
+ setValue(&I, Res);
+}
+
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -7642,8 +7928,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7660,6 +7946,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
auto &DL = CLI.DAG.getDataLayout();
ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+ if (CLI.IsPostTypeLegalization) {
+ // If we are lowering a libcall after legalization, split the return type.
+ SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
+ SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
+ for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
+ EVT RetVT = OldRetTys[i];
+ uint64_t Offset = OldOffsets[i];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
+ unsigned RegisterVTSize = RegisterVT.getSizeInBits();
+ RetTys.append(NumRegs, RegisterVT);
+ for (unsigned j = 0; j != NumRegs; ++j)
+ Offsets.push_back(Offset + j * RegisterVTSize);
+ }
+ }
+
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
@@ -7679,19 +7981,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
- DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.isSwiftSelf = false;
- Entry.isSwiftError = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -7702,8 +8004,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT =
+ getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
@@ -7724,7 +8028,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].isSwiftError) {
+ if (Args[i].IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
@@ -7740,8 +8044,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ // FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg);
@@ -7752,13 +8057,17 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- if (Args[i].isZExt)
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+
+ if (Args[i].IsZExt)
Flags.setZExt();
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
Flags.setSExt();
- if (Args[i].isInReg) {
+ if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
@@ -7771,15 +8080,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Set InReg Flag
Flags.setInReg();
}
- if (Args[i].isSRet)
+ if (Args[i].IsSRet)
Flags.setSRet();
- if (Args[i].isSwiftSelf)
+ if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
- if (Args[i].isSwiftError)
+ if (Args[i].IsSwiftError)
Flags.setSwiftError();
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
Flags.setByVal();
- if (Args[i].isInAlloca) {
+ if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
@@ -7788,7 +8097,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].isByVal || Args[i].isInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
@@ -7801,24 +8110,25 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
- if (Args[i].isNest)
+ if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
- MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumParts =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
- else if (Args[i].isZExt)
+ else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ if (Args[i].IsReturned && !Op.getValueType().isVector()) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -7832,13 +8142,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
- (ExtendKind != ISD::ANY_EXTEND &&
- CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
- Flags.setReturned();
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+ CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+ true);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -7916,7 +8227,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned i = 0; i < NumValues; ++i) {
SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
CLI.DAG.getConstant(Offsets[i], CLI.DL,
- PtrVT), &Flags);
+ PtrVT), Flags);
SDValue L = CLI.DAG.getLoad(
RetTys[i], CLI.DL, CLI.Chain, Add,
MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
@@ -7938,12 +8249,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT =
+ getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
- AssertOp));
+ AssertOp, true));
CurReg += NumRegs;
}
@@ -7979,8 +8292,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // If this is an InlineAsm we have to match the registers required, not the
+ // notional registers required by the type.
+
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType());
+ V->getType(), isABIRegCopy(V));
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8010,6 +8326,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
+typedef DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>
+ ArgCopyElisionMapTy;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info intrinsics, they don't escape or store to allocas.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca. Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ SDValue ArgVal, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ unsigned RequiredAlignment = AI->getAlignment();
+ if (!RequiredAlignment) {
+ RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ AI->getAllocatedType());
+ }
+ if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
@@ -8032,16 +8515,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Ins.push_back(RetArg);
}
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
// Set up the incoming argument description vector.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I, ++Idx) {
+ for (const Argument &Arg : F.args()) {
+ unsigned ArgNo = Arg.getArgNo();
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
- bool isArgValueUsed = !I->use_empty();
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
- Type *FinalType = I->getType();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ Type *FinalType = Arg.getType();
+ if (Arg.hasAttribute(Attribute::ByVal))
FinalType = cast<PointerType>(FinalType)->getElementType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg());
@@ -8050,17 +8538,22 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ unsigned OriginalAlignment =
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+
+ if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ if (Arg.hasAttribute(Attribute::SExt))
Flags.setSExt();
- if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
+ if (Arg.hasAttribute(Attribute::InReg)) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
- isa<StructType>(I->getType())) {
+ isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
@@ -8069,15 +8562,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set InReg Flag
Flags.setInReg();
}
- if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
+ if (Arg.hasAttribute(Attribute::StructRet))
Flags.setSRet();
- if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
+ if (Arg.hasAttribute(Attribute::SwiftSelf))
Flags.setSwiftSelf();
- if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError))
+ if (Arg.hasAttribute(Attribute::SwiftError))
Flags.setSwiftError();
- if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ if (Arg.hasAttribute(Attribute::ByVal))
Flags.setByVal();
- if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ if (Arg.hasAttribute(Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
@@ -8088,33 +8581,37 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getCallingConv() == CallingConv::X86_INTR) {
// IA Interrupt passes frame (1st parameter) by value in the stack.
- if (Idx == 1)
+ if (ArgNo == 0)
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(I->getType());
+ PointerType *Ty = cast<PointerType>(Arg.getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
- if (F.getParamAlignment(Idx))
- FrameAlign = F.getParamAlignment(Idx);
+ if (Arg.getParamAlignment())
+ FrameAlign = Arg.getParamAlignment();
else
FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
- if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+ if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
- MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
- Idx-1, PartBase+i*RegisterVT.getStoreSize());
+ ArgNo, PartBase+i*RegisterVT.getStoreSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -8155,7 +8652,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set up the argument values.
unsigned i = 0;
- Idx = 1;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
@@ -8177,49 +8673,63 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
DAG.setRoot(NewRoot);
// i indexes lowered arguments. Bump it past the hidden sret argument.
- // Idx indexes LLVM arguments. Don't touch it.
++i;
}
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
- ++I, ++Idx) {
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ InVals[i], ArgHasUses);
+ }
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
- F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
- if (I->use_empty() && NumValues && !isSwiftErrorArg) {
- SDB->setUnusedArgValue(&*I, InVals[i]);
+ Arg.hasAttribute(Attribute::SwiftError);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
- MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+ MVT PartVT =
+ TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
- if (!I->use_empty() || isSwiftErrorArg) {
+ if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ if (Arg.hasAttribute(Attribute::SExt))
AssertOp = ISD::AssertSext;
- else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ else if (Arg.hasAttribute(Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
- NumParts, PartVT, VT,
- nullptr, AssertOp));
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr, AssertOp,
+ true));
}
i += NumParts;
@@ -8232,18 +8742,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(&*I, Res);
+ SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8263,18 +8773,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[&*I] = Reg;
+ FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(&*I);
- SDB->CopyToExportRegsIfNeeded(&*I);
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
assert(i == InVals.size() && "Argument register count mismatch!");
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+ if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.Slot = I->second;
+ }
+ }
+
// Finally, if the target has anything special to do, allow it to do so.
EmitFunctionEntryCode();
}
@@ -8402,13 +8930,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
HasTailCall = true;
}
-bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
- const SmallVectorImpl<unsigned> &TotalCases,
- unsigned First, unsigned Last,
- unsigned Density) const {
+uint64_t
+SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last) const {
assert(Last >= First);
- assert(TotalCases[Last] >= TotalCases[First]);
-
const APInt &LowCase = Clusters[First].Low->getValue();
const APInt &HighCase = Clusters[Last].High->getValue();
assert(LowCase.getBitWidth() == HighCase.getBitWidth());
@@ -8417,26 +8942,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
// comparison to lower. We should discriminate against such consecutive ranges
// in jump tables.
- uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
- uint64_t Range = Diff + 1;
+ return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
+}
+uint64_t SelectionDAGBuilder::getJumpTableNumCases(
+ const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
+ unsigned Last) const {
+ assert(Last >= First);
+ assert(TotalCases[Last] >= TotalCases[First]);
uint64_t NumCases =
TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
-
- assert(NumCases < UINT64_MAX / 100);
- assert(Range >= NumCases);
-
- return NumCases * 100 >= Range * Density;
-}
-
-static inline bool areJTsAllowed(const TargetLowering &TLI,
- const SwitchInst *SI) {
- const Function *Fn = SI->getParent()->getParent();
- if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
- return false;
-
- return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
- TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+ return NumCases;
}
bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
@@ -8475,10 +8991,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumDests = JTProbs.size();
- if (isSuitableForBitTests(NumDests, NumCmps,
- Clusters[First].Low->getValue(),
- Clusters[Last].High->getValue())) {
+ if (TLI.isSuitableForBitTests(
+ NumDests, NumCmps, Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), DAG.getDataLayout())) {
// Clusters[First..Last] should be lowered as bit tests instead.
return false;
}
@@ -8499,7 +9016,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
}
JumpTableMBB->normalizeSuccProbs();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
->createJumpTableIndex(Table);
@@ -8528,17 +9044,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
#endif
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!areJTsAllowed(TLI, SI))
+ if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
return;
- const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
-
const int64_t N = Clusters.size();
const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
- const unsigned MaxJumpTableSize =
- OptForSize || TLI.getMaximumJumpTableSize() == 0
- ? UINT_MAX : TLI.getMaximumJumpTableSize();
if (N < 2 || N < MinJumpTableEntries)
return;
@@ -8553,15 +9064,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
TotalCases[i] += TotalCases[i - 1];
}
- const unsigned MinDensity =
- OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
-
// Cheap case: the whole range may be suitable for jump table.
- unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
- Clusters[0].Low->getValue())
- .getLimitedValue(UINT_MAX - 1) + 1;
- if (JumpTableSize <= MaxJumpTableSize &&
- isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
+ uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+ uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+ if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
@@ -8614,11 +9122,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
// Search for a solution that results in fewer partitions.
for (int64_t j = N - 1; j > i; j--) {
// Try building a partition from Clusters[i..j].
- JumpTableSize = (Clusters[j].High->getValue() -
- Clusters[i].Low->getValue())
- .getLimitedValue(UINT_MAX - 1) + 1;
- if (JumpTableSize <= MaxJumpTableSize &&
- isDense(Clusters, TotalCases, i, j, MinDensity)) {
+ uint64_t Range = getJumpTableRange(Clusters, i, j);
+ uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+ if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
int64_t NumEntries = j - i + 1;
@@ -8662,36 +9170,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
Clusters.resize(DstIndex);
}
-bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
- // FIXME: Using the pointer type doesn't seem ideal.
- uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
- uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
- return Range <= BW;
-}
-
-bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
- unsigned NumCmps,
- const APInt &Low,
- const APInt &High) {
- // FIXME: I don't think NumCmps is the correct metric: a single case and a
- // range of cases both require only one branch to lower. Just looking at the
- // number of clusters and destinations should be enough to decide whether to
- // build bit tests.
-
- // To lower a range with bit tests, the range must fit the bitwidth of a
- // machine word.
- if (!rangeFitsInWord(Low, High))
- return false;
-
- // Decide whether it's profitable to lower this range with bit tests. Each
- // destination requires a bit test and branch, and there is an overall range
- // check branch. For a small number of clusters, separate comparisons might be
- // cheaper, and for many destinations, splitting the range might be better.
- return (NumDests == 1 && NumCmps >= 3) ||
- (NumDests == 2 && NumCmps >= 5) ||
- (NumDests == 3 && NumCmps >= 6);
-}
-
bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
unsigned First, unsigned Last,
const SwitchInst *SI,
@@ -8713,16 +9191,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
APInt High = Clusters[Last].High->getValue();
assert(Low.slt(High));
- if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
return false;
APInt LowBound;
APInt CmpRange;
- const int BitWidth = DAG.getTargetLoweringInfo()
- .getPointerTy(DAG.getDataLayout())
- .getSizeInBits();
- assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
+ const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
+ assert(TLI.rangeFitsInWord(Low, High, DL) &&
+ "Case range must fit in bit mask!");
// Check if the clusters cover a contiguous range such that no value in the
// range will jump to the default statement.
@@ -8812,7 +9291,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
// If target does not have legal shift left, do not emit bit tests at all.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
+ const DataLayout &DL = DAG.getDataLayout();
+
+ EVT PTy = TLI.getPointerTy(DL);
if (!TLI.isOperationLegal(ISD::SHL, PTy))
return;
@@ -8843,8 +9324,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
// Try building a partition from Clusters[i..j].
// Check the range.
- if (!rangeFitsInWord(Clusters[i].Low->getValue(),
- Clusters[j].High->getValue()))
+ if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
+ Clusters[j].High->getValue(), DL))
continue;
// Check nbr of destinations and cluster types.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index abde8a8..ac1d6aa 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -38,7 +38,6 @@ class BranchInst;
class CallInst;
class DbgValueInst;
class ExtractElementInst;
-class ExtractValueInst;
class FCmpInst;
class FPExtInst;
class FPToSIInst;
@@ -53,7 +52,6 @@ class IntToPtrInst;
class IndirectBrInst;
class InvokeInst;
class InsertElementInst;
-class InsertValueInst;
class Instruction;
class LoadInst;
class MachineBasicBlock;
@@ -304,10 +302,13 @@ private:
BranchProbability DefaultProb;
};
- /// Check whether a range of clusters is dense enough for a jump table.
- bool isDense(const CaseClusterVector &Clusters,
- const SmallVectorImpl<unsigned> &TotalCases,
- unsigned First, unsigned Last, unsigned MinDensity) const;
+ /// Return the range of value in [First..Last].
+ uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
+ unsigned Last) const;
+
+ /// Return the number of cases in [First..Last].
+ uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last) const;
/// Build a jump table cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
@@ -319,14 +320,6 @@ private:
void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
MachineBasicBlock *DefaultMBB);
- /// Check whether the range [Low,High] fits in a machine word.
- bool rangeFitsInWord(const APInt &Low, const APInt &High);
-
- /// Check whether these clusters are suitable for lowering with bit tests based
- /// on the number of destinations, comparison metric, and range.
- bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
- const APInt &Low, const APInt &High);
-
/// Build a bit test cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
@@ -609,40 +602,34 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), FuncInfo(funcinfo),
+ DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
HasTailCall(false) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
- /// clear - Clear out the current SelectionDAG and the associated
- /// state and prepare this SelectionDAGBuilder object to be used
- /// for a new block. This doesn't clear out information about
- /// additional blocks that are needed to complete switch lowering
- /// or PHI node updating; that information is cleared out as it is
- /// consumed.
+ /// Clear out the current SelectionDAG and the associated state and prepare
+ /// this SelectionDAGBuilder object to be used for a new block. This doesn't
+ /// clear out information about additional blocks that are needed to complete
+ /// switch lowering or PHI node updating; that information is cleared out as
+ /// it is consumed.
void clear();
- /// clearDanglingDebugInfo - Clear the dangling debug information
- /// map. This function is separated from the clear so that debug
- /// information that is dangling in a basic block can be properly
- /// resolved in a different basic block. This allows the
- /// SelectionDAG to resolve dangling debug information attached
- /// to PHI nodes.
+ /// Clear the dangling debug information map. This function is separated from
+ /// the clear so that debug information that is dangling in a basic block can
+ /// be properly resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached to PHI nodes.
void clearDanglingDebugInfo();
- /// getRoot - Return the current virtual root of the Selection DAG,
- /// flushing any PendingLoad items. This must be done before emitting
- /// a store or any other node that may need to be ordered after any
- /// prior load instructions.
- ///
+ /// Return the current virtual root of the Selection DAG, flushing any
+ /// PendingLoad items. This must be done before emitting a store or any other
+ /// node that may need to be ordered after any prior load instructions.
SDValue getRoot();
- /// getControlRoot - Similar to getRoot, but instead of flushing all the
- /// PendingLoad items, flush all the PendingExports items. It is necessary
- /// to do this before emitting a terminator instruction.
- ///
+ /// Similar to getRoot, but instead of flushing all the PendingLoad items,
+ /// flush all the PendingExports items. It is necessary to do this before
+ /// emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
@@ -688,12 +675,13 @@ public:
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc, BranchProbability TW,
- BranchProbability FW);
+ BranchProbability FW, bool InvertCond);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- BranchProbability TW, BranchProbability FW);
+ BranchProbability TW, BranchProbability FW,
+ bool InvertCond);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
@@ -782,6 +770,11 @@ public:
bool VarArgDisallowed,
bool ForceVoidReturnTy);
+ /// Returns the type of FrameIndex and TargetFrameIndex nodes.
+ MVT getFrameIndexTy() {
+ return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout());
+ }
+
private:
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -864,8 +857,8 @@ private:
void visitInsertElement(const User &I);
void visitShuffleVector(const User &I);
- void visitExtractValue(const ExtractValueInst &I);
- void visitInsertValue(const InsertValueInst &I);
+ void visitExtractValue(const User &I);
+ void visitInsertValue(const User &I);
void visitLandingPad(const LandingPadInst &I);
void visitGetElementPtr(const User &I);
@@ -900,6 +893,7 @@ private:
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
@@ -913,6 +907,8 @@ private:
void visitGCRelocate(const GCRelocateInst &I);
void visitGCResult(const GCResultInst &I);
+ void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
}
@@ -932,7 +928,7 @@ private:
/// instruction selection, they will be inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- int64_t Offset, bool IsIndirect,
+ int64_t Offset, bool IsDbgDeclare,
const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
@@ -944,8 +940,8 @@ private:
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset, DebugLoc dl,
- unsigned DbgSDNodeOrder);
+ DIExpression *Expr, int64_t Offset,
+ const DebugLoc &dl, unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
@@ -958,62 +954,69 @@ private:
/// type.
///
struct RegsForValue {
- /// ValueVTs - The value types of the values, which may not be legal, and
+ /// The value types of the values, which may not be legal, and
/// may need be promoted or synthesized from one or more registers.
- ///
SmallVector<EVT, 4> ValueVTs;
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
+ /// The value types of the registers. This is the same size as ValueVTs and it
+ /// records, for each value, what the type of the assigned register or
+ /// registers are. (Individual values are never synthesized from more than one
+ /// type of register.)
///
/// With virtual registers, the contents of RegVTs is redundant with TLI's
/// getRegisterType member function, however when with physical registers
/// it is necessary to have a separate record of the types.
- ///
SmallVector<MVT, 4> RegVTs;
- /// Regs - This list holds the registers assigned to the values.
+ /// This list holds the registers assigned to the values.
/// Each legal or promoted value requires one register, and each
/// expanded value requires multiple registers.
- ///
SmallVector<unsigned, 4> Regs;
+ /// This list holds the number of registers for each value.
+ SmallVector<unsigned, 4> RegCount;
+
+ /// Records if this value needs to be treated in an ABI dependant manner,
+ /// different to normal type legalization.
+ bool IsABIMangled;
+
RegsForValue();
- RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+ RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+ bool IsABIMangledValue = false);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty);
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ bool IsABIMangledValue = false);
- /// append - Add the specified values to this one.
+ /// Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ RegCount.push_back(RHS.Regs.size());
}
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
+ /// Emit a series of CopyFromReg nodes that copies from this value and returns
+ /// the result as a ValueVTs value. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
+ /// flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V = nullptr) const;
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
- /// value into the registers specified by this object. This uses Chain/Flag
- /// as the input and updates them for the output Chain/Flag. If the Flag
- /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used
- /// in printing better diagnostic messages on error.
+ /// Emit a series of CopyToReg nodes that copies the specified value into the
+ /// registers specified by this object. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no
+ /// flag is used. If V is not nullptr, then it is used in printing better
+ /// diagnostic messages on error.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
+ /// Add this value to the specified inlineasm node operand list. This adds the
+ /// code marker, matching input operand index (if applicable), and includes
+ /// the number of values added into it.
void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 0faaad8..3dd5897 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
@@ -214,6 +214,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCE: return "setcce";
+ case ISD::SETCCCARRY: return "setcccarry";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -227,6 +228,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
+ case ISD::ADDCARRY: return "addcarry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
@@ -235,6 +237,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULO: return "umulo";
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
+ case ISD::SUBCARRY: return "subcarry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
@@ -300,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
@@ -343,6 +347,19 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE: return "setfalse";
case ISD::SETFALSE2: return "setfalse2";
}
+ case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_ADD: return "vecreduce_add";
+ case ISD::VECREDUCE_MUL: return "vecreduce_mul";
+ case ISD::VECREDUCE_AND: return "vecreduce_and";
+ case ISD::VECREDUCE_OR: return "vecreduce_or";
+ case ISD::VECREDUCE_XOR: return "vecreduce_xor";
+ case ISD::VECREDUCE_SMAX: return "vecreduce_smax";
+ case ISD::VECREDUCE_SMIN: return "vecreduce_smin";
+ case ISD::VECREDUCE_UMAX: return "vecreduce_umax";
+ case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
+ case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
+ case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
}
}
@@ -366,11 +383,13 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
-void SDNode::dump(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
}
+#endif
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
@@ -416,7 +435,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
+ CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
OS << ")>";
}
} else if (const GlobalAddressSDNode *GADN =
@@ -566,6 +585,7 @@ static bool shouldPrintInline(const SDNode &Node) {
return Node.getNumOperands() == 0;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
if (shouldPrintInline(*Op.getNode()))
@@ -592,6 +612,7 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
dbgs() << "\n\n";
}
+#endif
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
OS << PrintNodeId(*this) << ": ";
@@ -618,6 +639,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -646,15 +668,16 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
DumpNodesr(OS, Op.getNode(), indent+2, G, once);
}
-void SDNode::dumpr() const {
+LLVM_DUMP_METHOD void SDNode::dumpr() const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, nullptr, once);
}
-void SDNode::dumpr(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, G, once);
}
+#endif
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
@@ -688,14 +711,17 @@ void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
printrWithDepth(OS, G, 10);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
printrWithDepth(dbgs(), G, depth);
}
-void SDNode::dumprFull(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 10);
}
+#endif
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 64e6c22..bdf57e8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,43 +11,73 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -59,6 +89,14 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -73,104 +111,6 @@ STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
STATISTIC(NumFastIselFailLowerArguments,
"Number of entry blocks where fast isel failed to lower arguments");
-#ifndef NDEBUG
-static cl::opt<bool>
-EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
- cl::desc("Enable extra verbose messages in the \"fast\" "
- "instruction selector"));
-
- // Terminators
-STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
-STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
-STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
-STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
-STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
-STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
-STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
-
- // Standard binary operators...
-STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
-STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
-STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
-STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
-STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
-STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
-STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
-STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
-STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
-STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
-STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
-STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
-
- // Logical operators...
-STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
-STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
-STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
-
- // Memory instructions...
-STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
-STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
-STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
-STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
-STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
-STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
-STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
-
- // Convert instructions...
-STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
-STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
-STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
-STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
-STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
-STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
-STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
-STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
-STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
-STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
-STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
-STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
-
- // Other instructions...
-STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
-STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
-STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
-STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
-STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
-STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
-STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
-STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
-STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
-STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
-STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
-STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
-STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
-STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
-STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
-
-// Intrinsic instructions...
-STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
-STATISTIC(NumFastIselFailSAddWithOverflow,
- "Fast isel fails on sadd.with.overflow");
-STATISTIC(NumFastIselFailUAddWithOverflow,
- "Fast isel fails on uadd.with.overflow");
-STATISTIC(NumFastIselFailSSubWithOverflow,
- "Fast isel fails on ssub.with.overflow");
-STATISTIC(NumFastIselFailUSubWithOverflow,
- "Fast isel fails on usub.with.overflow");
-STATISTIC(NumFastIselFailSMulWithOverflow,
- "Fast isel fails on smul.with.overflow");
-STATISTIC(NumFastIselFailUMulWithOverflow,
- "Fast isel fails on umul.with.overflow");
-STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
-STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
-STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
-STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
-#endif
-
-static cl::opt<bool>
-EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
- cl::desc("Enable verbose messages in the \"fast\" "
- "instruction selector"));
static cl::opt<int> EnableFastISelAbort(
"fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction selection "
@@ -179,6 +119,11 @@ static cl::opt<int> EnableFastISelAbort(
"abort for argument lowering, and 3 will never fallback "
"to SelectionDAG."));
+static cl::opt<bool> EnableFastISelFallbackReport(
+ "fast-isel-report-on-fallback", cl::Hidden,
+ cl::desc("Emit a diagnostic when \"fast\" instruction selection "
+ "falls back to SelectionDAG."));
+
static cl::opt<bool>
UseMBPI("use-mbpi",
cl::desc("use Machine Branch Probability Info"),
@@ -238,7 +183,7 @@ MachinePassRegistry RegisterScheduler::Registry;
///
//===---------------------------------------------------------------------===//
static cl::opt<RegisterScheduler::FunctionPassCtor, false,
- RegisterPassParser<RegisterScheduler> >
+ RegisterPassParser<RegisterScheduler>>
ISHeuristic("pre-RA-sched",
cl::init(&createDefaultScheduler), cl::Hidden,
cl::desc("Instruction schedulers available (before register"
@@ -249,6 +194,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
createDefaultScheduler);
namespace llvm {
+
//===--------------------------------------------------------------------===//
/// \brief This class is used by SelectionDAGISel to temporarily override
/// the optimization level on a per-function basis.
@@ -318,6 +264,7 @@ namespace llvm {
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
}
+
} // end namespace llvm
// EmitInstrWithCustomInserter - This method should be implemented by targets
@@ -357,7 +304,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
- GFI(),
+ AA(), GFI(),
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
@@ -375,7 +322,8 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AAResultsWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<StackProtector>();
@@ -389,11 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
/// may trap on it. In this case we have to split the edge so that the path
/// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction.
-///
+/// execute the possibly trapping instruction. If available, we pass domtree
+/// and loop info to be updated when we split critical edges. This is because
+/// SelectionDAGISel preserves these analyses.
/// This is required for correctness, so it must be done at -O0.
///
-static void SplitCriticalSideEffectEdges(Function &Fn) {
+static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
+ LoopInfo *LI) {
// Loop for blocks with phi nodes.
for (BasicBlock &BB : Fn) {
PHINode *PN = dyn_cast<PHINode>(BB.begin());
@@ -419,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn) {
// Okay, we have to split this edge.
SplitCriticalEdge(
Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
- CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
+ CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges());
goto ReprocessBlock;
}
}
@@ -431,8 +381,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineFunctionProperties::Property::Selected))
return false;
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
- "-fast-isel-verbose requires -fast-isel");
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort > 0 requires -fast-isel");
@@ -454,23 +402,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF);
+ CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
+ // Now get the optional analyzes if we want to.
+ // This is based on the possibly changed OptLevel (after optnone is taken
+ // into account). That's unfortunate but OK because it just means we won't
+ // ask for passes that have been required anyway.
+
if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
- SDB->init(GFI, *AA, LibInfo);
+ if (OptLevel != CodeGenOpt::None)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ else
+ AA = nullptr;
+
+ SDB->init(GFI, AA, LibInfo);
MF->setHasInlineAsm(false);
@@ -502,6 +464,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TLI->initializeSplitCSR(EntryMBB);
SelectAllBasicBlocks(Fn);
+ if (FastISelFailed && EnableFastISelFallbackReport) {
+ DiagnosticInfoISelFallback DiagFallback(Fn);
+ Fn.getContext().diagnose(DiagFallback);
+ }
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
@@ -628,7 +594,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
unsigned To = I->second;
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
- for (;;) {
+ while (true) {
DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E) break;
To = J->second;
@@ -648,13 +614,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MRI.replaceRegWith(From, To);
}
- if (TLI->hasCopyImplyingStackAdjustment(MF))
- MFI.setHasCopyImplyingStackAdjustment(true);
-
- // Freeze the set of reserved registers now that MachineFrameInfo has been
- // set up. All the information required by getReservedRegs() should be
- // available now.
- MRI.freezeReservedRegs(*MF);
+ TLI->finalizeLowering(*MF);
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
@@ -666,13 +626,30 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
+static void reportFastISelFailure(MachineFunction &MF,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R,
+ bool ShouldAbort) {
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || ShouldAbort)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (ShouldAbort)
+ report_fatal_error(R.getMsg());
+
+ ORE.emit(R);
+}
+
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
- for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
- SDB->visit(*I);
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ if (!ElidedArgCopyInstrs.count(&*I))
+ SDB->visit(*I);
+ }
// Make sure the root of the DAG is up-to-date.
CurDAG->setRoot(SDB->getControlRoot());
@@ -689,8 +666,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
Worklist.push_back(CurDAG->getRoot().getNode());
- APInt KnownZero;
- APInt KnownOne;
+ KnownBits Known;
do {
SDNode *N = Worklist.pop_back_val();
@@ -719,8 +695,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- CurDAG->computeKnownBits(Src, KnownZero, KnownOne);
- FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+ CurDAG->computeKnownBits(Src, Known);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known);
} while (!Worklist.empty());
}
@@ -731,6 +707,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+
+ // Pre-type legalization allow creation of any node types.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
#ifndef NDEBUG
MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
FilterDAGBasicBlockName ==
@@ -756,7 +736,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
+ CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -777,6 +757,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
+ // Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
if (Changed) {
@@ -787,12 +768,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
GroupName, GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
-
}
{
@@ -802,12 +782,18 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
+ DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
GroupDescription, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
+ DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -815,7 +801,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
GroupName, GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -841,7 +827,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -907,10 +893,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
namespace {
+
/// ISelUpdater - helper class to handle updates of the instruction selection
/// graph.
class ISelUpdater : public SelectionDAG::DAGUpdateListener {
SelectionDAG::allnodes_iterator &ISelPosition;
+
public:
ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
: SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
@@ -923,6 +911,7 @@ public:
++ISelPosition;
}
};
+
} // end anonymous namespace
void SelectionDAGISel::DoInstructionSelection() {
@@ -960,6 +949,19 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
+ // When we are using non-default rounding modes or FP exception behavior
+ // FP operations are represented by StrictFP pseudo-operations. They
+ // need to be simplified here so that the target-specific instruction
+ // selectors know how to handle them.
+ //
+ // If the current node is a strict FP pseudo-op, the isStrictFPOp()
+ // function will provide the corresponding normal FP opcode to which the
+ // node should be mutated.
+ //
+ // FIXME: The backends need a way to handle FP constraints.
+ if (Node->isStrictFPOpcode())
+ Node = CurDAG->mutateStrictFPToFP(Node);
+
Select(Node);
}
@@ -1046,116 +1048,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-#ifndef NDEBUG
-// Collect per Instruction statistics for fast-isel misses. Only those
-// instructions that cause the bail are accounted for. It does not account for
-// instructions higher in the block. Thus, summing the per instructions stats
-// will not add up to what is reported by NumFastIselFailures.
-static void collectFailStats(const Instruction *I) {
- switch (I->getOpcode()) {
- default: assert (0 && "<Invalid operator> ");
-
- // Terminators
- case Instruction::Ret: NumFastIselFailRet++; return;
- case Instruction::Br: NumFastIselFailBr++; return;
- case Instruction::Switch: NumFastIselFailSwitch++; return;
- case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
- case Instruction::Invoke: NumFastIselFailInvoke++; return;
- case Instruction::Resume: NumFastIselFailResume++; return;
- case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
-
- // Standard binary operators...
- case Instruction::Add: NumFastIselFailAdd++; return;
- case Instruction::FAdd: NumFastIselFailFAdd++; return;
- case Instruction::Sub: NumFastIselFailSub++; return;
- case Instruction::FSub: NumFastIselFailFSub++; return;
- case Instruction::Mul: NumFastIselFailMul++; return;
- case Instruction::FMul: NumFastIselFailFMul++; return;
- case Instruction::UDiv: NumFastIselFailUDiv++; return;
- case Instruction::SDiv: NumFastIselFailSDiv++; return;
- case Instruction::FDiv: NumFastIselFailFDiv++; return;
- case Instruction::URem: NumFastIselFailURem++; return;
- case Instruction::SRem: NumFastIselFailSRem++; return;
- case Instruction::FRem: NumFastIselFailFRem++; return;
-
- // Logical operators...
- case Instruction::And: NumFastIselFailAnd++; return;
- case Instruction::Or: NumFastIselFailOr++; return;
- case Instruction::Xor: NumFastIselFailXor++; return;
-
- // Memory instructions...
- case Instruction::Alloca: NumFastIselFailAlloca++; return;
- case Instruction::Load: NumFastIselFailLoad++; return;
- case Instruction::Store: NumFastIselFailStore++; return;
- case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
- case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
- case Instruction::Fence: NumFastIselFailFence++; return;
- case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
-
- // Convert instructions...
- case Instruction::Trunc: NumFastIselFailTrunc++; return;
- case Instruction::ZExt: NumFastIselFailZExt++; return;
- case Instruction::SExt: NumFastIselFailSExt++; return;
- case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
- case Instruction::FPExt: NumFastIselFailFPExt++; return;
- case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
- case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
- case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
- case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
- case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
- case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
- case Instruction::BitCast: NumFastIselFailBitCast++; return;
-
- // Other instructions...
- case Instruction::ICmp: NumFastIselFailICmp++; return;
- case Instruction::FCmp: NumFastIselFailFCmp++; return;
- case Instruction::PHI: NumFastIselFailPHI++; return;
- case Instruction::Select: NumFastIselFailSelect++; return;
- case Instruction::Call: {
- if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- NumFastIselFailIntrinsicCall++; return;
- case Intrinsic::sadd_with_overflow:
- NumFastIselFailSAddWithOverflow++; return;
- case Intrinsic::uadd_with_overflow:
- NumFastIselFailUAddWithOverflow++; return;
- case Intrinsic::ssub_with_overflow:
- NumFastIselFailSSubWithOverflow++; return;
- case Intrinsic::usub_with_overflow:
- NumFastIselFailUSubWithOverflow++; return;
- case Intrinsic::smul_with_overflow:
- NumFastIselFailSMulWithOverflow++; return;
- case Intrinsic::umul_with_overflow:
- NumFastIselFailUMulWithOverflow++; return;
- case Intrinsic::frameaddress:
- NumFastIselFailFrameaddress++; return;
- case Intrinsic::sqrt:
- NumFastIselFailSqrt++; return;
- case Intrinsic::experimental_stackmap:
- NumFastIselFailStackMap++; return;
- case Intrinsic::experimental_patchpoint_void: // fall-through
- case Intrinsic::experimental_patchpoint_i64:
- NumFastIselFailPatchPoint++; return;
- }
- }
- NumFastIselFailCall++;
- return;
- }
- case Instruction::Shl: NumFastIselFailShl++; return;
- case Instruction::LShr: NumFastIselFailLShr++; return;
- case Instruction::AShr: NumFastIselFailAShr++; return;
- case Instruction::VAArg: NumFastIselFailVAArg++; return;
- case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
- case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
- case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
- case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
- case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
- case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
- }
-}
-#endif // NDEBUG
-
/// Set up SwiftErrorVals by going through the function. If the function has
/// swifterror argument, it will be the first entry.
static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
@@ -1166,6 +1058,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
FuncInfo->SwiftErrorVals.clear();
FuncInfo->SwiftErrorVRegDefMap.clear();
FuncInfo->SwiftErrorVRegUpwardsUse.clear();
+ FuncInfo->SwiftErrorVRegDefUses.clear();
FuncInfo->SwiftErrorArg = nullptr;
// Check if function has a swifterror argument.
@@ -1190,9 +1083,9 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
}
static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+ FastISel *FastIS,
const TargetLowering *TLI,
const TargetInstrInfo *TII,
- const BasicBlock *LLVMBB,
SelectionDAGBuilder *SDB) {
if (!TLI->supportSwiftError())
return;
@@ -1202,21 +1095,71 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
if (FuncInfo->SwiftErrorVals.empty())
return;
- if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- // We will always generate a copy from the argument. It is always used at
- // least by the 'return' of the swifterror.
- if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+ assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
+ "expected to insert into entry block");
+ auto &DL = FuncInfo->MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+ continue;
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+ SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ VReg);
+
+ // Keep FastIS informed about the value we just inserted.
+ if (FastIS)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
+ }
+}
+
+/// Collect llvm.dbg.declare information. This is done after argument lowering
+/// in case the declarations refer to arguments.
+static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
+ MachineFunction *MF = FuncInfo->MF;
+ const DataLayout &DL = MF->getDataLayout();
+ for (const BasicBlock &BB : *FuncInfo->Fn) {
+ for (const Instruction &I : BB) {
+ const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (!DI)
+ continue;
+
+ assert(DI->getVariable() && "Missing variable");
+ assert(DI->getDebugLoc() && "Missing location");
+ const Value *Address = DI->getAddress();
+ if (!Address)
+ continue;
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getPointerSizeInBits(0), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo->StaticAllocaMap.find(AI);
+ if (SI != FuncInfo->StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo->getArgumentFrameIndex(Arg);
+
+ if (FI == std::numeric_limits<int>::max())
continue;
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // Assign Undef to Vreg. We construct MI directly to make sure it works
- // with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
- SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
- VReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
+
+ DIExpression *Expr = DI->getExpression();
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::NoDeref,
+ Offset.getZExtValue());
+ MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
}
@@ -1339,7 +1282,82 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
}
+void preassignSwiftErrorRegs(const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo,
+ BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
+ if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
+ return;
+
+ // Iterator over instructions and assign vregs to swifterror defs and uses.
+ for (auto It = Begin; It != End; ++It) {
+ ImmutableCallSite CS(&*It);
+ if (CS) {
+ // A call-site with a swifterror argument is both use and def.
+ const Value *SwiftErrorAddr = nullptr;
+ for (auto &Arg : CS.args()) {
+ if (!Arg->isSwiftError())
+ continue;
+ // Use of swifterror.
+ assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+ SwiftErrorAddr = &*Arg;
+ assert(SwiftErrorAddr->isSwiftError() &&
+ "Must have a swifterror value argument");
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
+ &*It, FuncInfo->MBB, SwiftErrorAddr);
+ assert(CreatedReg);
+ }
+ if (!SwiftErrorAddr)
+ continue;
+
+ // Def of swifterror.
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
+ assert(CreatedReg);
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
+
+ // A load is a use.
+ } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+ const Value *V = LI->getOperand(0);
+ if (!V->isSwiftError())
+ continue;
+
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
+ assert(CreatedReg);
+
+ // A store is a def.
+ } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+ const Value *SwiftErrorAddr = SI->getOperand(1);
+ if (!SwiftErrorAddr->isSwiftError())
+ continue;
+
+ // Def of swifterror.
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) =
+ FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
+ assert(CreatedReg);
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
+
+ // A return in a swiferror returning function is a use.
+ } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+ const Function *F = R->getParent()->getParent();
+ if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ continue;
+
+ unsigned VReg; bool CreatedReg;
+ std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
+ R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
+ assert(CreatedReg);
+ }
+ }
+}
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
@@ -1347,12 +1365,55 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
setupSwiftErrorVals(Fn, TLI, FuncInfo);
- // Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
- for (ReversePostOrderTraversal<const Function*>::rpo_iterator
- I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- const BasicBlock *LLVMBB = *I;
+ // Lower arguments up front. An RPO iteration always visits the entry block
+ // first.
+ assert(*RPOT.begin() == &Fn.getEntryBlock());
+ ++NumEntryBlocks;
+
+ // Set up FuncInfo for ISel. Entry blocks never have PHIs.
+ FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
+ FuncInfo->InsertPt = FuncInfo->MBB->begin();
+
+ if (!FastIS) {
+ LowerArguments(Fn);
+ } else {
+ // See if fast isel can lower the arguments.
+ FastIS->startNewBlock();
+ if (!FastIS->lowerArguments()) {
+ FastISelFailed = true;
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Fn.getSubprogram(),
+ &Fn.getEntryBlock());
+ R << "FastISel didn't lower all arguments: "
+ << ore::NV("Prototype", Fn.getType());
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+ createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+ processDbgDeclares(FuncInfo);
+
+ // Iterate over all basic blocks in the function.
+ for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@@ -1384,8 +1445,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
if (!FuncInfo->MBB)
continue; // Some blocks like catchpads have no code or MBB.
- FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
- createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
+
+ // Insert new instructions after any phi or argument setup code.
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1396,43 +1458,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
- FastIS->startNewBlock();
-
- // Emit code for any incoming arguments. This must happen before
- // beginning FastISel on the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
-
- // Lower any arguments needed in this block if this is the entry block.
- if (!FastIS->lowerArguments()) {
- // Fast isel failed to lower these arguments
- ++NumFastIselFailLowerArguments;
- if (EnableFastISelAbort > 1)
- report_fatal_error("FastISel didn't lower all arguments");
-
- // Use SelectionDAG argument lowering
- LowerArguments(Fn);
- CurDAG->setRoot(SDB->getControlRoot());
- SDB->clear();
- CodeGenAndEmitDAG();
- }
-
- // If we inserted any instructions at the beginning, make a note of
- // where they are, so we can be sure to emit subsequent instructions
- // after them.
- if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
- FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- else
- FastIS->setLastLocalValue(nullptr);
- }
+ if (LLVMBB != &Fn.getEntryBlock())
+ FastIS->startNewBlock();
unsigned NumFastIselRemaining = std::distance(Begin, End);
+
+ // Pre-assign swifterror vregs.
+ preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ ElidedArgCopyInstrs.count(Inst)) {
--NumFastIselRemaining;
continue;
}
@@ -1465,22 +1505,28 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
-#ifndef NDEBUG
- if (EnableFastISelVerbose2)
- collectFailStats(Inst);
-#endif
+ FastISelFailed = true;
// Then handle certain instructions as single-LLVM-Instruction blocks.
- if (isa<CallInst>(Inst)) {
-
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- dbgs() << "FastISel missed call: ";
- Inst->dump();
+ // We cannot separate out GCrelocates to their own blocks since we need
+ // to keep track of gc-relocates for a particular gc-statepoint. This is
+ // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
+ // visitGCRelocate.
+ if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) {
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ R << "FastISel missed call";
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+
+ R << ": " << InstStr.str();
}
- if (EnableFastISelAbort > 2)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
!Inst->use_empty()) {
@@ -1509,35 +1555,35 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
bool ShouldAbort = EnableFastISelAbort;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- if (isa<TerminatorInst>(Inst)) {
- // Use a different message for terminator misses.
- dbgs() << "FastISel missed terminator: ";
- // Don't abort unless for terminator unless the level is really high
- ShouldAbort = (EnableFastISelAbort > 2);
- } else {
- dbgs() << "FastISel miss: ";
- }
- Inst->dump();
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
+ R << "FastISel missed terminator";
+ // Don't abort for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ R << "FastISel missed";
}
- if (ShouldAbort)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+ R << ": " << InstStr.str();
+ }
+
+ reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
NumFastIselFailures += NumFastIselRemaining;
break;
}
FastIS->recomputeInsertPt();
- } else {
- // Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
- LowerArguments(Fn);
- }
}
+
if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
@@ -1556,10 +1602,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// block.
bool HadTailCall;
SelectBasicBlock(Begin, BI, HadTailCall);
+
+ // But if FastISel was run, we already selected some of the block.
+ // If we emitted a tail-call, we need to delete any previously emitted
+ // instruction that follows it.
+ if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
+ ElidedArgCopyInstrs.clear();
}
propagateSwiftErrorVRegs(FuncInfo);
@@ -1975,11 +2028,11 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
// either already zero or is not demanded. Check for known zero input bits.
APInt NeededMask = DesiredMask & ~ActualMask;
- APInt KnownZero, KnownOne;
- CurDAG->computeKnownBits(LHS, KnownZero, KnownOne);
+ KnownBits Known;
+ CurDAG->computeKnownBits(LHS, Known);
// If all the missing bits in the or are already known to be set, match!
- if ((NeededMask & KnownOne) == NeededMask)
+ if (NeededMask.isSubsetOf(Known.One))
return true;
// TODO: check to see if missing bits are just not demanded.
@@ -2062,7 +2115,7 @@ static SDNode *findGlueUse(SDNode *N) {
}
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
-/// This function recursively traverses up the operand chain, ignoring
+/// This function iteratively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
@@ -2075,30 +2128,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
// happen because we scan down to newly selected nodes in the case of glue
// uses.
- if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
- return false;
+ std::vector<SDNode *> WorkList;
+ WorkList.push_back(Use);
- // Don't revisit nodes if we already scanned it and didn't fail, we know we
- // won't fail if we scan it again.
- if (!Visited.insert(Use).second)
- return false;
+ while (!WorkList.empty()) {
+ Use = WorkList.back();
+ WorkList.pop_back();
+ if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
+ continue;
- for (const SDValue &Op : Use->op_values()) {
- // Ignore chain uses, they are validated by HandleMergeInputChains.
- if (Op.getValueType() == MVT::Other && IgnoreChains)
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use).second)
continue;
- SDNode *N = Op.getNode();
- if (N == Def) {
- if (Use == ImmedUse || Use == Root)
- continue; // We are not looking for immediate use.
- assert(N != Root);
- return true;
- }
+ for (const SDValue &Op : Use->op_values()) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Op.getValueType() == MVT::Other && IgnoreChains)
+ continue;
- // Traverse up the operand chain.
- if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
- return true;
+ SDNode *N = Op.getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ WorkList.push_back(N);
+ }
}
return false;
}
@@ -2177,7 +2236,6 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
IgnoreChains = false;
}
-
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
@@ -2554,7 +2612,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
@@ -2564,9 +2622,9 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// CheckChildSame - Implements OP_CheckChildXSame.
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
- unsigned ChildNo) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
@@ -2688,7 +2746,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result,
const SelectionDAGISel &SDISel,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -2756,6 +2814,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
}
namespace {
+
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
@@ -2785,6 +2844,7 @@ class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
SDNode **NodeToMatch;
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
SmallVectorImpl<MatchScope> &MatchScopes;
+
public:
MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
@@ -2816,6 +2876,7 @@ public:
J.setNode(E);
}
};
+
} // end anonymous namespace
void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
@@ -2921,7 +2982,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// with an OPC_SwitchOpcode instruction. Populate the table now, since this
// is the first time we're selecting an instruction.
unsigned Idx = 1;
- while (1) {
+ while (true) {
// Get the size of this case.
unsigned CaseSize = MatcherTable[Idx++];
if (CaseSize & 128)
@@ -2942,7 +3003,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
- while (1) {
+ while (true) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
unsigned CurrentOpcodeIndex = MatcherIndex;
@@ -2957,7 +3018,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
- while (1) {
+ while (true) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
@@ -3118,7 +3179,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3149,7 +3210,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MVT CurNodeVT = N.getSimpleValueType();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3215,7 +3276,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// a single use.
bool HasMultipleUses = false;
for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
- if (!NodeStack[i].hasOneUse()) {
+ if (!NodeStack[i].getNode()->hasOneUse()) {
HasMultipleUses = true;
break;
}
@@ -3381,6 +3442,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
continue;
}
+ case OPC_Coverage: {
+ // This is emitted right before MorphNode/EmitNode.
+ // So it should be safe to assume that this node has been selected
+ unsigned index = MatcherTable[MatcherIndex++];
+ index |= (MatcherTable[MatcherIndex++] << 8);
+ dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
+ dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
+ continue;
+ }
case OPC_EmitNode: case OPC_MorphNodeTo:
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
@@ -3473,7 +3543,6 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
nullptr));
}
-
} else {
assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
"NodeToMatch was removed partway through selection");
@@ -3610,7 +3679,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// find a case to check.
DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
++NumDAGIselRetries;
- while (1) {
+ while (true) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
return;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 2764688..11561df 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index d27e245..5d78bba 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,9 +17,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/CallingConv.h"
@@ -110,8 +110,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
Builder.FuncInfo.StatepointStackSlots.size() &&
"Broken invariant");
- StatepointMaxSlotsRequired = std::max<unsigned long>(
- StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
+ StatepointMaxSlotsRequired.updateMax(
+ Builder.FuncInfo.StatepointStackSlots.size());
return SpillSlot;
}
@@ -242,7 +242,8 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
// Cache this slot so we find it when going through the normal
// assignment loop.
- SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType());
+ SDValue Loc =
+ Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy());
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
@@ -343,7 +344,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
Builder);
int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
// We use TargetFrameIndex so that isel will not select it into LEA
- Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
// TODO: We can create TokenFactor node instead of
// chaining stores one after another, this may allow
@@ -391,8 +392,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
// This handles allocas as arguments to the statepoint (this is only
// really meaningful for a deopt value. For GC, we'd be trying to
// relocate the address of the alloca itself?)
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Incoming.getValueType()));
+ Builder.getFrameIndexTy()));
} else if (LiveInOnly) {
// If this value is live in (not live-on-return, or live-through), we can
// treat it the same way patchpoint treats it's "live in" values. We'll
@@ -527,8 +530,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SDValue Incoming = Builder.getValue(V);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint
+ assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
+ "Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Incoming.getValueType()));
+ Builder.getFrameIndexTy()));
}
}
@@ -813,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
SI.GCTransitionArgs =
ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
SI.ID = ISP.getID();
- SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end());
+ SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
SI.StatepointFlags = ISP.getFlags();
SI.NumPatchBytes = ISP.getNumPatchBytes();
SI.EHPadBB = EHPadBB;
@@ -835,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
// completely and make statepoint call to return a tuple.
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy);
+ DAG.getDataLayout(), Reg, RetTy, true);
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
@@ -949,8 +954,8 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation,
- SD.getValueType());
+ SDValue SpillSlot =
+ DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
// Be conservative: flush all pending loads
// TODO: Probably we can be less restrictive on this,
@@ -958,7 +963,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
SDValue Chain = getRoot();
SDValue SpillLoad =
- DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+ DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType()),
+ getCurSDLoc(), Chain, SpillSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
*DerivedPtrLocation));
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 690f0d2..8652df7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -55,14 +56,15 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CallerAttrs = F->getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
- .removeAttribute(Attribute::NoAlias).hasAttributes())
+ AttributeList CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Check if the only use is a function return node.
@@ -96,19 +98,19 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
/// \brief Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
-void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
+void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned ArgIdx) {
+ IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ Alignment = CS->getParamAlignment(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -125,8 +127,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
for (SDValue Op : Ops) {
Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
@@ -138,10 +140,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(signExtend).setZExtResult(!signExtend);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(doesNotReturn)
+ .setDiscardResult(!isReturnValueUsed)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
@@ -334,34 +339,40 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// Check to see if the specified operand of the specified instruction is a
-/// constant integer. If so, check to see if there are any bits set in the
-/// constant that are not demanded. If so, shrink the constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
- const APInt &Demanded) {
- SDLoc dl(Op);
+/// If the specified instruction has a constant integer operand and there are
+/// bits set in that constant that are not demanded, then clear those bits and
+/// return true.
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
+ SelectionDAG &DAG = TLO.DAG;
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+
+ // Do target-specific constant optimization.
+ if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+ return TLO.New.getNode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
- switch (Op.getOpcode()) {
- default: break;
+ switch (Opcode) {
+ default:
+ break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!C) return false;
+ auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Op1C)
+ return false;
- if (Op.getOpcode() == ISD::XOR &&
- (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ // If this is a 'not' op, don't touch it because that's a canonical form.
+ const APInt &C = Op1C->getAPIntValue();
+ if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
return false;
- // if we can expand it to have all bits set, do it
- if (C->getAPIntValue().intersects(~Demanded)) {
+ if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
- SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
- DAG.getConstant(Demanded &
- C->getAPIntValue(),
- dl, VT));
- return CombineTo(Op, New);
+ SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ return TLO.CombineTo(Op, NewOp);
}
break;
@@ -374,15 +385,17 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
- unsigned BitWidth,
- const APInt &Demanded,
- const SDLoc &dl) {
+bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+ const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ SelectionDAG &DAG = TLO.DAG;
+ SDLoc dl(Op);
+
// Early return, as this function cannot handle vector types.
if (Op.getValueType().isVector())
return false;
@@ -404,31 +417,28 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, Op.getValueType())) {
// We found a type with free casts.
- SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
- DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
- Op.getNode()->getOperand(0)),
- DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
- Op.getNode()->getOperand(1)));
+ SDValue X = DAG.getNode(
+ Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
bool NeedZext = DemandedSize > SmallVTBits;
SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
dl, Op.getValueType(), X);
- return CombineTo(Op, Z);
+ return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool
-TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
- unsigned OpIdx,
- const APInt &Demanded,
- DAGCombinerInfo &DCI) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
+ const APInt &Demanded,
+ DAGCombinerInfo &DCI,
+ TargetLoweringOpt &TLO) const {
SDValue Op = User->getOperand(OpIdx);
- APInt KnownZero, KnownOne;
+ KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
- *this, 0, true))
+ if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
return false;
@@ -440,9 +450,9 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
// with the value 'x', which will give us:
// Old = i32 and x, 0xffffff
// New = x
- if (Old.hasOneUse()) {
+ if (TLO.Old.hasOneUse()) {
// For the one use case, we just commit the change.
- DCI.CommitTargetLoweringOpt(*this);
+ DCI.CommitTargetLoweringOpt(TLO);
return true;
}
@@ -450,17 +460,17 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
// AssumeSingleUse flag is not propogated to recursive calls of
// SimplifyDemanded bits, so the only node with multiple use that
// it will attempt to combine will be opt.
- assert(Old == Op);
+ assert(TLO.Old == Op);
SmallVector <SDValue, 4> NewOps;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
if (i == OpIdx) {
- NewOps.push_back(New);
+ NewOps.push_back(TLO.New);
continue;
}
NewOps.push_back(User->getOperand(i));
}
- DAG.UpdateNodeOperands(User, NewOps);
+ TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines
// with it.
DCI.AddToWorklist(Op.getNode());
@@ -470,17 +480,30 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
return true;
}
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+ DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ KnownBits Known;
+
+ bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
+ if (Simplified)
+ DCI.CommitTargetLoweringOpt(TLO);
+ return Simplified;
+}
+
/// Look at Op. At this point, we know that only the DemandedMask bits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
/// original and new nodes in Old and New. Otherwise, analyze the expression and
-/// return a mask of KnownOne and KnownZero bits for the expression (used to
-/// simplify the caller). The KnownZero/One bits may only be accurate for those
-/// bits in the DemandedMask.
+/// return a mask of Known bits for the expression (used to simplify the
+/// caller). The Known bits may only be accurate for those bits in the
+/// DemandedMask.
bool TargetLowering::SimplifyDemandedBits(SDValue Op,
const APInt &DemandedMask,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth,
bool AssumeSingleUse) const {
@@ -492,14 +515,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
auto &DL = TLO.DAG.getDataLayout();
// Don't know anything.
- KnownZero = KnownOne = APInt(BitWidth, 0);
+ Known = KnownBits(BitWidth);
// Other users may use these bits.
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
- // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // If not at the root, Just compute the Known bits to
// simplify things downstream.
- TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ TLO.DAG.computeKnownBits(Op, Known, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -514,38 +537,36 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
- APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
case ISD::Constant:
// We know all of the bits for a constant!
- KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
- KnownZero = ~KnownOne;
+ Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+ Known.Zero = ~Known.One;
return false; // Don't fall through, will infinitely loop.
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every constant vector element.
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
for (SDValue SrcOp : Op->ops()) {
if (!isa<ConstantSDNode>(SrcOp)) {
// We can only handle all constant values - bail out with no known bits.
- KnownZero = KnownOne = APInt(BitWidth, 0);
+ Known = KnownBits(BitWidth);
return false;
}
- KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
- KnownZero2 = ~KnownOne2;
+ Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
+ Known2.Zero = ~Known2.One;
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
- if (KnownOne2.getBitWidth() != BitWidth) {
- assert(KnownOne2.getBitWidth() > BitWidth &&
- KnownZero2.getBitWidth() > BitWidth &&
+ if (Known2.One.getBitWidth() != BitWidth) {
+ assert(Known2.getBitWidth() > BitWidth &&
"Expected BUILD_VECTOR implicit truncation");
- KnownOne2 = KnownOne2.trunc(BitWidth);
- KnownZero2 = KnownZero2.trunc(BitWidth);
+ Known2 = Known2.trunc(BitWidth);
}
// Known bits are the values that are shared by every element.
// TODO: support per-element known bits.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
}
return false; // Don't fall through, will infinitely loop.
case ISD::AND:
@@ -553,18 +574,18 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
SDValue Op0 = Op.getOperand(0);
- APInt LHSZero, LHSOne;
+ KnownBits LHSKnown;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
+ TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
- if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op0);
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -573,183 +594,191 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the xor. For example, for a 32-bit X:
// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
- LHSOne == ~RHSC->getAPIntValue()) {
+ LHSKnown.One == ~RHSC->getAPIntValue()) {
SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
Op0.getOperand(0), Op.getOperand(1));
return TLO.CombineTo(Op, Xor);
}
}
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
- KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
- KnownZero2, KnownOne2, TLO, Depth+1))
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
+ Known2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ if (NewMask.isSubsetOf(Known2.Zero | Known.One))
return TLO.CombineTo(Op, Op.getOperand(0));
- if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ if (NewMask.isSubsetOf(Known.Zero | Known2.One))
return TLO.CombineTo(Op, Op.getOperand(1));
// If all of the demanded bits in the inputs are known zeros, return zero.
- if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
// If the RHS is a constant, see if we can simplify it.
- if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne &= KnownOne2;
+ Known.One &= Known2.One;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero |= KnownZero2;
+ Known.Zero |= Known2.Zero;
break;
case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
- KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
- KnownZero2, KnownOne2, TLO, Depth+1))
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
+ Known2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ if (NewMask.isSubsetOf(Known2.One | Known.Zero))
return TLO.CombineTo(Op, Op.getOperand(0));
- if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
- return TLO.CombineTo(Op, Op.getOperand(1));
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
- return TLO.CombineTo(Op, Op.getOperand(0));
- if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ if (NewMask.isSubsetOf(Known.One | Known2.Zero))
return TLO.CombineTo(Op, Op.getOperand(1));
// If the RHS is a constant, see if we can simplify it.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero &= KnownZero2;
+ Known.Zero &= Known2.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne |= KnownOne2;
+ Known.One |= Known2.One;
break;
- case ISD::XOR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
- KnownOne, TLO, Depth+1))
+ case ISD::XOR: {
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
- KnownOne2, TLO, Depth+1))
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if ((KnownZero & NewMask) == NewMask)
+ if (NewMask.isSubsetOf(Known.Zero))
return TLO.CombineTo(Op, Op.getOperand(0));
- if ((KnownZero2 & NewMask) == NewMask)
+ if (NewMask.isSubsetOf(Known2.Zero))
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
// (but not both) turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
Op.getOperand(0),
Op.getOperand(1)));
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
// If all of the demanded bits on one side are known, and all of the set
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
- if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
- if (KnownOne == KnownOne2) { // set bits are the same on both sides
+ if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side
+ if (Known.One == Known2.One) { // set bits are the same on both sides
EVT VT = Op.getValueType();
- SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
+ SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
}
}
- // If the RHS is a constant, see if we can simplify it.
- // for XOR, we prefer to force bits to 1 if they will make a -1.
- // If we can't force bits, try to shrink the constant.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- APInt Expanded = C->getAPIntValue() | (~NewMask);
- // If we can expand it to have all bits set, do it.
- if (Expanded.isAllOnesValue()) {
- if (Expanded != C->getAPIntValue()) {
- EVT VT = Op.getValueType();
- SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
- TLO.DAG.getConstant(Expanded, dl, VT));
- return TLO.CombineTo(Op, New);
- }
- // If it already has all the bits set, nothing to change
- // but don't shrink either!
- } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
- return true;
+ // If the RHS is a constant, see if we can change it. Don't alter a -1
+ // constant because that's a 'not' op, and that is better for combining and
+ // codegen.
+ ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1));
+ if (C && !C->isAllOnesValue()) {
+ if (NewMask.isSubsetOf(C->getAPIntValue())) {
+ // We're flipping all demanded bits. Flip the undemanded bits too.
+ SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType());
+ return TLO.CombineTo(Op, New);
}
+ // If we can't turn this into a 'not', try to shrink the constant.
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
+ return true;
}
- KnownZero = KnownZeroOut;
- KnownOne = KnownOneOut;
+ Known = std::move(KnownOut);
break;
+ }
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
- KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
- KnownOne2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
- KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
- KnownOne2, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
break;
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(Op.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
+
+ // TODO: Should we check for other forms of sign-bit comparisons?
+ // Examples: X <= -1, X >= 0
+ }
+ if (getBooleanContents(Op0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
case ISD::SHL:
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
@@ -781,17 +810,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
- SDValue InnerOp = InOp.getNode()->getOperand(0);
+ SDValue InnerOp = InOp.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getSizeInBits();
- if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
+ if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -813,12 +841,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
InnerOp.getOpcode() == ISD::SRL &&
InnerOp.hasOneUse() &&
isa<ConstantSDNode>(InnerOp.getOperand(1))) {
- uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
->getZExtValue();
if (InnerShAmt < ShAmt &&
InnerShAmt < InnerBits &&
- NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
- NewMask.trunc(ShAmt) == 0) {
+ NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+ NewMask.countTrailingZeros() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
Op.getOperand(1).getValueType());
@@ -831,10 +859,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- KnownZero <<= SA->getZExtValue();
- KnownOne <<= SA->getZExtValue();
+ Known.Zero <<= SA->getZExtValue();
+ Known.One <<= SA->getZExtValue();
// low bits known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ Known.Zero.setLowBits(SA->getZExtValue());
}
break;
case ISD::SRL:
@@ -852,8 +880,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
- if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
@@ -877,15 +905,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
// Compute the new bits that are at the top now.
- if (SimplifyDemandedBits(InOp, InDemandedMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- KnownZero |= HighBits; // High bits known zero.
+ Known.Zero.setHighBits(ShAmt); // High bits known zero.
}
break;
case ISD::SRA:
@@ -893,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (NewMask == 1)
+ if (NewMask.isOneValue())
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -910,33 +936,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
- if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ if (Op->getFlags().hasExact())
+ InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- if (HighBits.intersects(NewMask))
- InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
+ if (NewMask.countLeadingZeros() < ShAmt)
+ InDemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
+ Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
-
- // Handle the sign bit, adjusted to where it is now in the mask.
- APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShAmt);
+ Known.One.lshrInPlace(ShAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ if (Known.Zero[BitWidth - ShAmt - 1] ||
+ NewMask.countLeadingZeros() >= ShAmt) {
SDNodeFlags Flags;
- Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
+ Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
- Op.getOperand(1), &Flags));
+ Op.getOperand(1), Flags));
}
int Log2 = NewMask.exactLogBase2();
@@ -949,9 +972,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getOperand(0), NewSA));
}
- if (KnownOne.intersects(SignBit))
+ if (Known.One[BitWidth - ShAmt - 1])
// New bits are known one.
- KnownOne |= HighBits;
+ Known.One.setHighBits(ShAmt);
}
break;
case ISD::SIGN_EXTEND_INREG: {
@@ -993,7 +1016,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
+ APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
ExVT.getScalarSizeInBits()) &
@@ -1004,24 +1027,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
InputDemandedBits |= InSignBit;
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
- KnownZero, KnownOne, TLO, Depth+1))
+ Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
- if (KnownZero.intersects(InSignBit))
+ if (Known.Zero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
Op.getOperand(0), dl, ExVT.getScalarType()));
- if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
- KnownOne |= NewBits;
- KnownZero &= ~NewBits;
+ if (Known.One.intersects(InSignBit)) { // Input sign bit known set
+ Known.One |= NewBits;
+ Known.Zero &= ~NewBits;
} else { // Input sign bit unknown
- KnownZero &= ~NewBits;
- KnownOne &= ~NewBits;
+ Known.Zero &= ~NewBits;
+ Known.One &= ~NewBits;
}
break;
}
@@ -1032,22 +1055,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
- APInt KnownZeroLo, KnownOneLo;
- APInt KnownZeroHi, KnownOneHi;
+ KnownBits KnownLo, KnownHi;
- if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo,
- KnownOneLo, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
return true;
- if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi,
- KnownOneHi, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
return true;
- KnownZero = KnownZeroLo.zext(BitWidth) |
- KnownZeroHi.zext(BitWidth).shl(HalfBitWidth);
+ Known.Zero = KnownLo.Zero.zext(BitWidth) |
+ KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
- KnownOne = KnownOneLo.zext(BitWidth) |
- KnownOneHi.zext(BitWidth).shl(HalfBitWidth);
+ Known.One = KnownLo.One.zext(BitWidth) |
+ KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
case ISD::ZERO_EXTEND: {
@@ -1062,20 +1082,18 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
Op.getValueType(),
Op.getOperand(0)));
- if (SimplifyDemandedBits(Op.getOperand(0), InMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- KnownZero |= NewBits;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known = Known.zext(BitWidth);
+ Known.Zero |= NewBits;
break;
}
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
- APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
APInt NewBits = ~InMask & NewMask;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -1090,37 +1108,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
InDemandedBits |= InSignBit;
InDemandedBits = InDemandedBits.trunc(InBits);
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
- KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
+ Depth+1))
return true;
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
+ Known = Known.zext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
- if (KnownZero.intersects(InSignBit))
+ if (Known.Zero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
// If the sign bit is known one, the top bits match.
- if (KnownOne.intersects(InSignBit)) {
- KnownOne |= NewBits;
- assert((KnownZero & NewBits) == 0);
+ if (Known.One.intersects(InSignBit)) {
+ Known.One |= NewBits;
+ assert((Known.Zero & NewBits) == 0);
} else { // Otherwise, top bits aren't known.
- assert((KnownOne & NewBits) == 0);
- assert((KnownZero & NewBits) == 0);
+ assert((Known.One & NewBits) == 0);
+ assert((Known.Zero & NewBits) == 0);
}
break;
}
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt InMask = NewMask.trunc(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), InMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -1128,11 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// zero/one bits live out.
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt TruncMask = NewMask.zext(OperandBitWidth);
- if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
return true;
- KnownZero = KnownZero.trunc(BitWidth);
- KnownOne = KnownOne.trunc(BitWidth);
+ Known = Known.trunc(BitWidth);
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
@@ -1158,26 +1171,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
getShiftAmountTy(Op.getValueType(), DL));
}
- APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
- OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
-
- if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
- // None of the shifted in bits are needed. Add a truncate of the
- // shift input, then shift it.
- SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
- In.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
- Op.getValueType(),
- NewTrunc,
- Shift));
+ if (ShAmt->getZExtValue() < BitWidth) {
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShAmt->getZExtValue());
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
}
break;
}
}
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -1187,11 +1203,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt InMask = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
- KnownZero, KnownOne, TLO, Depth+1))
+ Known, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- KnownZero |= ~InMask & NewMask;
+ Known.Zero |= ~InMask;
break;
}
case ISD::BITCAST:
@@ -1200,7 +1216,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!TLO.LegalOperations() &&
!Op.getValueType().isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
- NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
+ NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -1229,22 +1245,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// of the highest bit demanded of them.
APInt LoMask = APInt::getLowBitsSet(BitWidth,
BitWidth - NewMask.countLeadingZeros());
- if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
- KnownOne2, TLO, Depth+1) ||
- SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
- KnownOne2, TLO, Depth+1) ||
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) ||
+ SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) ||
// See if the operation should be performed at a smaller bit width.
- TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
- const SDNodeFlags *Flags = Op.getNode()->getFlags();
- if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
+ ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
- SDNodeFlags NewFlags = *Flags;
- NewFlags.setNoSignedWrap(false);
- NewFlags.setNoUnsignedWrap(false);
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1),
- &NewFlags);
+ Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
@@ -1253,13 +1266,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
default:
// Just use computeKnownBits to compute output bits.
- TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ TLO.DAG.computeKnownBits(Op, Known, Depth);
break;
}
// If we know the value of all of the demanded bits, return this as a
// constant.
- if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
+ if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNodeIterator I = SDNodeIterator::begin(N),
@@ -1270,17 +1283,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
return TLO.CombineTo(Op,
- TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
+ TLO.DAG.getConstant(Known.One, dl, Op.getValueType()));
}
return false;
}
/// Determine which of the bits specified in Mask are known to be either zero or
-/// one and return them in the KnownZero/KnownOne bitsets.
+/// one and return them in the Known.
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -1289,12 +1302,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Op.getOpcode() == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+ Known.resetAll();
}
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &,
const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -1306,31 +1320,38 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
+// work with truncating build vectors and vectors with elements of less than
+// 8 bits.
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- if (!CN) {
- const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
- if (!BV)
- return false;
-
- // Only interested in constant splats, we don't care about undef
- // elements in identifying boolean constants and getConstantSplatNode
- // returns NULL if all ops are undef;
- CN = BV->getConstantSplatNode();
+ APInt CVal;
+ if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ CVal = CN->getAPIntValue();
+ } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ auto *CN = BV->getConstantSplatNode();
if (!CN)
return false;
+
+ // If this is a truncating build vector, truncate the splat value.
+ // Otherwise, we may fail to match the expected values below.
+ unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
+ CVal = CN->getAPIntValue();
+ if (BVEltWidth < CVal.getBitWidth())
+ CVal = CVal.trunc(BVEltWidth);
+ } else {
+ return false;
}
switch (getBooleanContents(N->getValueType(0))) {
case UndefinedBooleanContent:
- return CN->getAPIntValue()[0];
+ return CVal[0];
case ZeroOrOneBooleanContent:
- return CN->isOne();
+ return CVal.isOneValue();
case ZeroOrNegativeOneBooleanContent:
- return CN->isAllOnesValue();
+ return CVal.isAllOnesValue();
}
llvm_unreachable("Invalid boolean contents");
@@ -1472,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS and fold constant comparisons.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isa<ConstantSDNode>(N0.getNode()) &&
(DCI.isBeforeLegalizeOps() ||
@@ -1486,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
const APInt &ShAmt
@@ -1617,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
-
}
}
}
- // If the LHS is '(and load, const)', the RHS is 0,
- // the test is for equality or unsigned, and all 1 bits of the const are
- // in the same partial word, see if we can shorten the load.
+ // If the LHS is '(and load, const)', the RHS is 0, the test is for
+ // equality or unsigned, and all 1 bits of the const are in the same
+ // partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
!ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
@@ -1647,16 +1666,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
- if ((newMask & Mask) == Mask) {
- if (!DAG.getDataLayout().isLittleEndian())
- bestOffset = (origWidth/width - offset - 1) * (width/8);
- else
+ if (Mask.isSubsetOf(newMask)) {
+ if (DAG.getDataLayout().isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
+ else
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
bestMask = Mask.lshr(offset * (width/8) * 8);
bestWidth = width;
break;
}
- newMask = newMask << width;
+ newMask <<= width;
}
}
}
@@ -1692,10 +1711,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
switch (Cond) {
case ISD::SETUGT:
case ISD::SETUGE:
- case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
+ case ISD::SETEQ:
+ return DAG.getConstant(0, dl, VT);
case ISD::SETULT:
case ISD::SETULE:
- case ISD::SETNE: return DAG.getConstant(1, dl, VT);
+ case ISD::SETNE:
+ return DAG.getConstant(1, dl, VT);
case ISD::SETGT:
case ISD::SETGE:
// True if the sign bit of C1 is set.
@@ -1764,12 +1785,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ExtSrcTyBits),
dl, ExtDstTy),
Cond);
- } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ } else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
- bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
@@ -1786,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
@@ -1795,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
BitWidth-1))) {
// Okay, get the un-inverted input value.
SDValue Val;
- if (N0.getOpcode() == ISD::XOR)
+ if (N0.getOpcode() == ISD::XOR) {
Val = N0.getOperand(0);
- else {
+ } else {
assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
@@ -1809,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->getAPIntValue() == 1 &&
+ } else if (N1C->isOne() &&
(VT == MVT::i1 ||
getBooleanContents(N0->getValueType(0)) ==
ZeroOrOneBooleanContent)) {
@@ -1827,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
- cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -1862,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
- if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true
+ // X >= MIN --> true
+ if (C1 == MinVal)
+ return DAG.getConstant(1, dl, VT);
+
// X >= C0 --> X > (C0 - 1)
APInt C = C1 - 1;
ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
@@ -1877,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
- if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true
+ // X <= MAX --> true
+ if (C1 == MaxVal)
+ return DAG.getConstant(1, dl, VT);
+
// X <= C0 --> X < (C0 + 1)
APInt C = C1 + 1;
ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
@@ -2006,7 +2033,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else {
ShiftBits = C1.countTrailingZeros();
}
- NewC = NewC.lshr(ShiftBits);
+ NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue())) {
auto &DL = DAG.getDataLayout();
@@ -2050,6 +2077,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETO || Cond == ISD::SETUO)
return DAG.getSetCC(dl, VT, N0, N0, Cond);
+ // setcc (fneg x), C -> setcc swap(pred) x, -C
+ if (N0.getOpcode() == ISD::FNEG) {
+ ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
+ SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
+ }
+ }
+
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
@@ -2129,7 +2166,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
if (N0.getOperand(1) == N1.getOperand(1))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
- if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ if (isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
@@ -2193,7 +2230,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(1),
DAG.getConstant(0, dl, N0.getValueType()), Cond);
if (N0.getOperand(1) == N1) {
- if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ if (isCommutativeBinOp(N0.getOpcode()))
return DAG.getSetCC(dl, VT, N0.getOperand(0),
DAG.getConstant(0, dl, N0.getValueType()),
Cond);
@@ -2220,7 +2257,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N1.getOperand(1),
DAG.getConstant(0, dl, N1.getValueType()), Cond);
if (N1.getOperand(1) == N0) {
- if (DAG.isCommutativeBinOp(N1.getOpcode()))
+ if (isCommutativeBinOp(N1.getOpcode()))
return DAG.getSetCC(dl, VT, N1.getOperand(0),
DAG.getConstant(0, dl, N1.getValueType()), Cond);
if (N1.getNode()->hasOneUse()) {
@@ -2445,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
- Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
SDLoc(C), MVT::i64));
}
return;
@@ -2470,13 +2507,10 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
- E = RI->regclass_end(); RCI != E; ++RCI) {
- const TargetRegisterClass *RC = *RCI;
-
+ for (const TargetRegisterClass *RC : RI->regclasses()) {
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
- if (!isLegalRC(RC))
+ if (!isLegalRC(*RI, *RC))
continue;
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
@@ -2488,9 +2522,9 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
// if no other is found which explicitly has the requested type.
- if (RC->hasType(VT))
+ if (RI->isTypeLegalForClass(*RC, VT))
return S;
- else if (!R.second)
+ if (!R.second)
R = S;
}
}
@@ -2914,9 +2948,9 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
- Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags);
Created.push_back(Op1.getNode());
- d = d.ashr(ShAmt);
+ d.ashrInPlace(ShAmt);
}
// Calculate the multiplicative inverse, using Newton's method.
@@ -2933,7 +2967,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -2958,7 +2992,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
return SDValue();
// If the sdiv has an 'exact' bit we can use a simpler lowering.
- if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact())
+ if (N->getFlags().hasExact())
return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
APInt::ms magics = Divisor.magic();
@@ -3297,7 +3331,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
SDValue Bias = DAG.getConstant(127, dl, IntVT);
- SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
IntVT);
SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
@@ -3808,7 +3842,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
- CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index ff7d205..7b60d22 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -16,9 +16,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -27,7 +27,7 @@
using namespace llvm;
-#define DEBUG_TYPE "shadowstackgclowering"
+#define DEBUG_TYPE "shadow-stack-gc-lowering"
namespace {
@@ -66,10 +66,10 @@ private:
};
}
-INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
-INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE,
"Shadow Stack GC Lowering", false, false)
FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index 4837495..aa75f5e 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -210,13 +210,12 @@ public:
char ShrinkWrap::ID = 0;
char &llvm::ShrinkWrapID = ShrinkWrap::ID;
-INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
- false)
+INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
+INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
@@ -282,8 +281,14 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
if (!Restore)
Restore = &MBB;
- else
+ else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
+ // means the block never returns. If that's the
+ // case, we don't want to call
+ // `findNearestCommonDominator`, which will
+ // return `Restore`.
Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+ else
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
// Make sure we would be able to insert the restore code before the
// terminator.
@@ -293,7 +298,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
- Restore = nullptr;
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
break;
}
// Look for a restore point that post-dominates all the successors.
@@ -419,7 +424,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF,
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (MF.empty() || !isShrinkWrapEnabled(MF))
+ if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
return false;
DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 209bbe5..17a3a84 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -12,11 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -64,6 +64,7 @@ public:
private:
bool setupEntryBlockAndCallSites(Function &F);
+ bool undoSwiftErrorSelect(Function &F);
void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
void lowerIncomingArguments(Function &F);
@@ -73,7 +74,7 @@ private:
} // end anonymous namespace
char SjLjEHPrepare::ID = 0;
-INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions",
+INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions",
false, false)
// Public Interface To the SjLjEHPrepare pass.
@@ -92,8 +93,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
doubleUnderDataTy, // __data
VoidPtrTy, // __personality
VoidPtrTy, // __lsda
- doubleUnderJBufTy, // __jbuf
- nullptr);
+ doubleUnderJBufTy // __jbuf
+ );
return true;
}
@@ -124,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
if (!LiveBBs.insert(BB).second)
return; // already been here.
- for (BasicBlock *PredBB : predecessors(BB))
- MarkBlocksLiveIn(PredBB, LiveBBs);
+ df_iterator_default_set<BasicBlock*> Visited;
+
+ for (BasicBlock *B : inverse_depth_first_ext(BB, Visited))
+ LiveBBs.insert(B);
+
}
/// substituteLPadValues - Substitute the values returned by the landingpad
@@ -174,8 +178,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
- &EntryBB->front());
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, "fn_context", &EntryBB->front());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
@@ -458,14 +462,33 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
return true;
}
+bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) {
+ // We have inserted dummy copies 'select true, arg, undef' in the entry block
+ // for arguments to simplify this pass.
+ // swifterror arguments cannot be used in this way. Undo the select for the
+ // swifterror argument.
+ for (auto &AI : F.args()) {
+ if (AI.isSwiftError()) {
+ assert(AI.hasOneUse() && "Must have converted the argument to a select");
+ auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser());
+ assert(Select && "There must be single select user");
+ auto *OrigSwiftError = cast<Argument>(Select->getTrueValue());
+ Select->replaceAllUsesWith(OrigSwiftError);
+ Select->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
bool SjLjEHPrepare::runOnFunction(Function &F) {
Module &M = *F.getParent();
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
@@ -476,5 +499,7 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
bool Res = setupEntryBlockAndCallSites(F);
+ if (Res)
+ Res |= undoSwiftErrorSelect(F);
return Res;
}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
index dba103e9..3656832 100644
--- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
#define DEBUG_TYPE "slotindexes"
char SlotIndexes::ID = 0;
-INITIALIZE_PASS(SlotIndexes, "slotindexes",
+INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
STATISTIC(NumLocalRenum, "Number of local renumberings");
@@ -103,6 +103,48 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) {
+ assert(!MI.isBundledWithPred() &&
+ "Use removeSingleMachineInstrFromMaps() instread");
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+}
+
+void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+
+ // When removing the first instruction of a bundle update mapping to next
+ // instruction.
+ if (MI.isBundledWithSucc()) {
+ // Only the first instruction of a bundle should have an index assigned.
+ assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");
+
+ MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
+ MachineInstr &NextMI = *Next;
+ MIEntry.setInstr(&NextMI);
+ mi2iMap.insert(std::make_pair(&NextMI, MIIndex));
+ return;
+ } else {
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+ }
+}
+
void SlotIndexes::renumberIndexes() {
// Renumber updates the index of every element of the index list.
DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
index f10c98e..0abe1c4 100644
--- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -40,14 +40,14 @@
using namespace llvm;
-#define DEBUG_TYPE "spillplacement"
+#define DEBUG_TYPE "spill-code-placement"
char SpillPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
"Spill Code Placement Analysis", true, true)
char &llvm::SpillPlacementID = SpillPlacement::ID;
@@ -310,7 +310,7 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
bool SpillPlacement::scanActiveBundles() {
RecentPositive.clear();
- for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ for (unsigned n : ActiveNodes->set_bits()) {
update(n);
// A node that must spill, or a node without any links is not going to
// change its value ever again, so exclude it from iterations.
@@ -365,7 +365,7 @@ SpillPlacement::finish() {
// Write preferences back to ActiveNodes.
bool Perfect = true;
- for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ for (unsigned n : ActiveNodes->set_bits())
if (!nodes[n].preferReg()) {
ActiveNodes->reset(n);
Perfect = false;
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 1c6a84e..323045f 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -52,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
- SmallVector<const MachineBasicBlock *, 1> EHPadSucessors;
+ SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors;
for (const MachineBasicBlock *SMBB : MBB.successors())
if (SMBB->isEHPad())
- EHPadSucessors.push_back(SMBB);
+ EHPadSuccessors.push_back(SMBB);
// Compute insert points on the first call. The pair is independent of the
// current live interval.
@@ -67,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
LIP.first = LIS.getInstructionIndex(*FirstTerm);
// If there is a landing pad successor, also find the call instruction.
- if (EHPadSucessors.empty())
+ if (EHPadSuccessors.empty())
return LIP.first;
// There may not be a call instruction (?) in which case we ignore LPad.
LIP.second = LIP.first;
@@ -86,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
if (!LIP.second)
return LIP.first;
- if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) {
+ if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) {
return LIS.isLiveInToMBB(CurLI, EHPad);
}))
return LIP.first;
@@ -487,12 +488,125 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
+SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ bool FirstCopy = !Def.isValid();
+ MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
+ .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
+ | getInternalReadRegState(!FirstCopy), SubIdx)
+ .addReg(FromReg, 0, SubIdx);
+
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ if (FirstCopy) {
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ } else {
+ CopyMI->bundleWithPred();
+ }
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
+ DestLI.refineSubRanges(Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange& SR) {
+ SR.createDeadDef(Def, Allocator);
+ });
+ return Def;
+}
+
+SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+ LaneBitmask LaneMask, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
+ // The full vreg is copied.
+ MachineInstr *CopyMI =
+ BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ }
+
+ // Only a subset of lanes needs to be copied. The following is a simple
+ // heuristic to construct a sequence of COPYs. We could add a target
+ // specific callback if this turns out to be suboptimal.
+ LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
+
+ // First pass: Try to find a perfectly matching subregister index. If none
+ // exists find the one covering the most lanemask bits.
+ SmallVector<unsigned, 8> PossibleIndexes;
+ unsigned BestIdx = 0;
+ unsigned BestCover = 0;
+ const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
+ assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
+ for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
+ // Is this index even compatible with the given class?
+ if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
+ continue;
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LaneMask) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // The index must not cover any lanes outside \p LaneMask.
+ if ((SubRegMask & ~LaneMask).any())
+ continue;
+
+ unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+ PossibleIndexes.push_back(Idx);
+ if (PopCount > BestCover) {
+ BestCover = PopCount;
+ BestIdx = Idx;
+ }
+ }
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
+ BestIdx, DestLI, Late, SlotIndex());
+
+ // Greedy heuristic: Keep iterating keeping the best covering subreg index
+ // each time.
+ LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx));
+ while (LanesLeft.any()) {
+ unsigned BestIdx = 0;
+ int BestCover = INT_MIN;
+ for (unsigned Idx : PossibleIndexes) {
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LanesLeft) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // Try to cover as much of the remaining lanes as possible but
+ // as few of the already covered lanes as possible.
+ int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
+ - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+ if (Cover > BestCover) {
+ BestCover = Cover;
+ BestIdx = Idx;
+ }
+ }
+
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+ DestLI, Late, Def);
+ LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
+ }
+
+ return Def;
+}
+
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
VNInfo *ParentVNI,
SlotIndex UseIdx,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- MachineInstr *CopyMI = nullptr;
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -505,24 +619,29 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ unsigned Reg = LI->reg;
bool DidRemat = false;
if (OrigVNI) {
LiveRangeEdit::Remat RM(ParentVNI);
RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
++NumRemats;
DidRemat = true;
}
}
if (!DidRemat) {
- // Can't remat, just insert a copy from parent.
- CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
- .addReg(Edit->getReg());
- Def = LIS.getSlotIndexes()
- ->insertMachineInstrInMaps(*CopyMI, Late)
- .getRegSlot();
+ LaneBitmask LaneMask;
+ if (LI->hasSubRanges()) {
+ LaneMask = LaneBitmask::getNone();
+ for (LiveInterval::SubRange &S : LI->subranges())
+ LaneMask |= S.LaneMask;
+ } else {
+ LaneMask = LaneBitmask::getAll();
+ }
+
++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
}
// Define the value in Reg.
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index a75738a..9d409e9 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -405,6 +405,17 @@ private:
/// deleteRematVictims - Delete defs that are dead after rematerializing.
void deleteRematVictims();
+ /// Add a copy instruction copying \p FromReg to \p ToReg before
+ /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
+ /// necessary to construct a sequence of copies to cover it exactly.
+ SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ bool Late, unsigned RegIdx);
+
+ SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
+
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 89c4b57..e5fc540 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -54,7 +54,7 @@
using namespace llvm;
-#define DEBUG_TYPE "stackcoloring"
+#define DEBUG_TYPE "stack-coloring"
static cl::opt<bool>
DisableColoring("no-stack-coloring",
@@ -87,10 +87,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+//
+// Stack Coloring reduces stack usage by merging stack slots when they
+// can't be used together. For example, consider the following C program:
+//
+// void bar(char *, int);
+// void foo(bool var) {
+// A: {
+// char z[4096];
+// bar(z, 0);
+// }
+//
+// char *p;
+// char x[4096];
+// char y[4096];
+// if (var) {
+// p = x;
+// } else {
+// bar(y, 1);
+// p = y + 1024;
+// }
+// B:
+// bar(p, 2);
+// }
+//
+// Naively-compiled, this program would use 12k of stack space. However, the
+// stack slot corresponding to `z` is always destroyed before either of the
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
+// is true, while `y` is only used if `var` is false. So in no time are 2
+// of the stack slots used together, and therefore we can merge them,
+// compiling the function using only a single 4k alloca:
+//
+// void foo(bool var) { // equivalent
+// char x[4096];
+// char *p;
+// bar(x, 0);
+// if (var) {
+// p = x;
+// } else {
+// bar(x, 1);
+// p = x + 1024;
+// }
+// bar(p, 2);
+// }
+//
+// This is an important optimization if we want stack space to be under
+// control in large functions, both open-coded ones and ones created by
+// inlining.
//
// Implementation Notes:
// ---------------------
//
+// An important part of the above reasoning is that `z` can't be accessed
+// while the latter 2 calls to `bar` are running. This is justified because
+// `z`'s lifetime is over after we exit from block `A:`, so any further
+// accesses to it would be UB. The way we represent this information
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
+// and `lifetime.end` intrinsics.
+//
+// The effect of these intrinsics seems to be as follows (maybe I should
+// specify this in the reference?):
+//
+// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
+// lifetime intrinsic refers to that stack slot, in which case
+// it is marked as *in-scope*.
+// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
+// the stack slot is overwritten with `undef`.
+// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
+// L4) on function exit, all stack slots are marked as *out-of-scope*.
+// L5) `lifetime.end` is a no-op when called on a slot that is already
+// *out-of-scope*.
+// L6) memory accesses to *out-of-scope* stack slots are UB.
+// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
+// are invalidated, unless the slot is "degenerate". This is used to
+// justify not marking slots as in-use until the pointer to them is
+// used, but feels a bit hacky in the presence of things like LICM. See
+// the "Degenerate Slots" section for more details.
+//
+// Now, let's ground stack coloring on these rules. We'll define a slot
+// as *in-use* at a (dynamic) point in execution if it either can be
+// written to at that point, or if it has a live and non-undef content
+// at that point.
+//
+// Obviously, slots that are never *in-use* together can be merged, and
+// in our example `foo`, the slots for `x`, `y` and `z` are never
+// in-use together (of course, sometimes slots that *are* in-use together
+// might still be mergable, but we don't care about that here).
+//
+// In this implementation, we successively merge pairs of slots that are
+// not *in-use* together. We could be smarter - for example, we could merge
+// a single large slot with 2 small slots, or we could construct the
+// interference graph and run a "smart" graph coloring algorithm, but with
+// that aside, how do we find out whether a pair of slots might be *in-use*
+// together?
+//
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
+// until their address is taken. Therefore, we can approximate slot activity
+// using dataflow.
+//
+// A subtle point: naively, we might try to figure out which pairs of
+// stack-slots interfere by propagating `S in-use` through the CFG for every
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
+// which they are both *in-use*.
+//
+// That is sound, but overly conservative in some cases: in our (artificial)
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
+// as `x` is only in use if we came in from the `var` edge and `y` only
+// if we came from the `!var` edge, they still can't be in use together.
+// See PR32488 for an important real-life case.
+//
+// If we wanted to find all points of interference precisely, we could
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
+//
+// However, we aren't interested in the *set* of points of interference
+// between 2 stack slots, only *whether* there *is* such a point. So we
+// can rely on a little trick: for `S` and `T` to be in-use together,
+// one of them needs to become in-use while the other is in-use (or
+// they might both become in use simultaneously). We can check this
+// by also keeping track of the points at which a stack slot might *start*
+// being in-use.
+//
+// Exact first use:
+// ----------------
+//
// Consider the following motivating example:
//
// int foo() {
@@ -159,6 +283,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
// byte stack (better).
//
+// Degenerate Slots:
+// -----------------
+//
// Relying entirely on first-use of stack slots is problematic,
// however, due to the fact that optimizations can sometimes migrate
// uses of a variable outside of its lifetime start/end region. Here
@@ -238,10 +365,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// for "b" then it will appear that 'b' has a degenerate lifetime.
//
-//===----------------------------------------------------------------------===//
-// StackColoring Pass
-//===----------------------------------------------------------------------===//
-
namespace {
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
@@ -272,8 +395,11 @@ class StackColoring : public MachineFunctionPass {
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
- /// Maps liveness intervals for each slot.
+ /// Maps slots to their use interval. Outside of this interval, slots
+ /// values are either dead or `undef` and they will not be written to.
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+ /// Maps slots to the points where they can become in-use.
+ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
/// VNInfo is used for the construction of LiveIntervals.
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
@@ -372,12 +498,12 @@ private:
char StackColoring::ID = 0;
char &llvm::StackColoringID = StackColoring::ID;
-INITIALIZE_PASS_BEGIN(StackColoring,
- "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
-INITIALIZE_PASS_END(StackColoring,
- "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,
+ "Merge disjoint stack slots", false, false)
void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<SlotIndexes>();
@@ -385,14 +511,13 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-#ifndef NDEBUG
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
const BitVector &BV) const {
- DEBUG(dbgs() << tag << " : { ");
+ dbgs() << tag << " : { ";
for (unsigned I = 0, E = BV.size(); I != E; ++I)
- DEBUG(dbgs() << BV.test(I) << " ");
- DEBUG(dbgs() << "}\n");
+ dbgs() << BV.test(I) << " ";
+ dbgs() << "}\n";
}
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
@@ -408,20 +533,19 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
LLVM_DUMP_METHOD void StackColoring::dump() const {
for (MachineBasicBlock *MBB : depth_first(MF)) {
- DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
- << MBB->getName() << "]\n");
- DEBUG(dumpBB(MBB));
+ dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n";
+ dumpBB(MBB);
}
}
LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
- DEBUG(dbgs() << "Interval[" << I << "]:\n");
- DEBUG(Intervals[I]->dump());
+ dbgs() << "Interval[" << I << "]:\n";
+ Intervals[I]->dump();
}
}
-
-#endif // not NDEBUG
+#endif
static inline int getStartOrEndSlot(const MachineInstr &MI)
{
@@ -570,9 +694,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
// Step 2: compute begin/end sets for each block
- // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
- // deterministic numbering, and because we'll need a post-order iteration
- // later for solving the liveness dataflow problem.
+ // NOTE: We use a depth-first iteration to ensure that we obtain a
+ // deterministic numbering.
for (MachineBasicBlock *MBB : depth_first(MF)) {
// Assign a serial number to this basic block.
@@ -676,15 +799,22 @@ void StackColoring::calculateLocalLiveness()
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SmallVector<SlotIndex, 16> Starts;
- SmallVector<SlotIndex, 16> Finishes;
+ SmallVector<bool, 16> DefinitelyInUse;
// For each block, find which slots are active within this block
// and update the live intervals.
for (const MachineBasicBlock &MBB : *MF) {
Starts.clear();
Starts.resize(NumSlots);
- Finishes.clear();
- Finishes.resize(NumSlots);
+ DefinitelyInUse.clear();
+ DefinitelyInUse.resize(NumSlots);
+
+ // Start the interval of the slots that we previously found to be 'in-use'.
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+ }
// Create the interval for the basic blocks containing lifetime begin/end.
for (const MachineInstr &MI : MBB) {
@@ -696,68 +826,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
for (auto Slot : slots) {
if (IsStart) {
- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ // If a slot is already definitely in use, we don't have to emit
+ // a new start marker because there is already a pre-existing
+ // one.
+ if (!DefinitelyInUse[Slot]) {
+ LiveStarts[Slot].push_back(ThisIndex);
+ DefinitelyInUse[Slot] = true;
+ }
+ if (!Starts[Slot].isValid())
Starts[Slot] = ThisIndex;
} else {
- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
- Finishes[Slot] = ThisIndex;
+ if (Starts[Slot].isValid()) {
+ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
+ Intervals[Slot]->addSegment(
+ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
+ Starts[Slot] = SlotIndex(); // Invalidate the start index
+ DefinitelyInUse[Slot] = false;
+ }
}
}
}
- // Create the interval of the blocks that we previously found to be 'alive'.
- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
- pos = MBBLiveness.LiveIn.find_next(pos)) {
- Starts[pos] = Indexes->getMBBStartIdx(&MBB);
- }
- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
- pos = MBBLiveness.LiveOut.find_next(pos)) {
- Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
- }
-
+ // Finish up started segments
for (unsigned i = 0; i < NumSlots; ++i) {
- //
- // When LifetimeStartOnFirstUse is turned on, data flow analysis
- // is forward (from starts to ends), not bidirectional. A
- // consequence of this is that we can wind up in situations
- // where Starts[i] is invalid but Finishes[i] is valid and vice
- // versa. Example:
- //
- // LIFETIME_START x
- // if (...) {
- // <use of x>
- // throw ...;
- // }
- // LIFETIME_END x
- // return 2;
- //
- //
- // Here the slot for "x" will not be live into the block
- // containing the "return 2" (since lifetimes start with first
- // use, not at the dominating LIFETIME_START marker).
- //
- if (Starts[i].isValid() && !Finishes[i].isValid()) {
- Finishes[i] = Indexes->getMBBEndIdx(&MBB);
- }
if (!Starts[i].isValid())
continue;
- assert(Starts[i] && Finishes[i] && "Invalid interval");
- VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
- SlotIndex S = Starts[i];
- SlotIndex F = Finishes[i];
- if (S < F) {
- // We have a single consecutive region.
- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
- } else {
- // We have two non-consecutive regions. This happens when
- // LIFETIME_START appears after the LIFETIME_END marker.
- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
- }
+ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
+ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
+ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
}
}
}
@@ -793,6 +890,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Keep a list of *allocas* which need to be remapped.
DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+
+ // Keep a list of allocas which has been affected by the remap.
+ SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
+
for (const std::pair<int, int> &SI : SlotRemap) {
const AllocaInst *From = MFI->getObjectAllocation(SI.first);
const AllocaInst *To = MFI->getObjectAllocation(SI.second);
@@ -812,6 +913,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
Inst = Cast;
}
+ // We keep both slots to maintain AliasAnalysis metadata later.
+ MergedAllocas.insert(From);
+ MergedAllocas.insert(To);
+
// Allow the stack protector to adjust its value map to account for the
// upcoming replacement.
SP->adjustForColoring(From, To);
@@ -843,13 +948,6 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Update the MachineMemOperand to use the new alloca.
for (MachineMemOperand *MMO : I.memoperands()) {
- // FIXME: In order to enable the use of TBAA when using AA in CodeGen,
- // we'll also need to update the TBAA nodes in MMOs with values
- // derived from the merged allocas. When doing this, we'll need to use
- // the same variant of GetUnderlyingObjects that is used by the
- // instruction scheduler (that can look through ptrtoint/inttoptr
- // pairs).
-
// We've replaced IR-level uses of the remapped allocas, so we only
// need to replace direct uses here.
const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
@@ -901,6 +999,48 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
MO.setIndex(ToSlot);
FixedInstr++;
}
+
+ // We adjust AliasAnalysis information for merged stack slots.
+ MachineSDNode::mmo_iterator NewMemOps =
+ MF->allocateMemRefsArray(I.getNumMemOperands());
+ unsigned MemOpIdx = 0;
+ bool ReplaceMemOps = false;
+ for (MachineMemOperand *MMO : I.memoperands()) {
+ // If this memory location can be a slot remapped here,
+ // we remove AA information.
+ bool MayHaveConflictingAAMD = false;
+ if (MMO->getAAInfo()) {
+ if (const Value *MMOV = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
+
+ if (Objs.empty())
+ MayHaveConflictingAAMD = true;
+ else
+ for (Value *V : Objs) {
+ // If this memory location comes from a known stack slot
+ // that is not remapped, we continue checking.
+ // Otherwise, we need to invalidate AA infomation.
+ const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
+ if (AI && MergedAllocas.count(AI)) {
+ MayHaveConflictingAAMD = true;
+ break;
+ }
+ }
+ }
+ }
+ if (MayHaveConflictingAAMD) {
+ NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes());
+ ReplaceMemOps = true;
+ }
+ else
+ NewMemOps[MemOpIdx++] = MMO;
+ }
+
+ // If any memory operand is updated, set memory references of
+ // this instruction.
+ if (ReplaceMemOps)
+ I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));
}
// Update the location of C++ catch objects for the MSVC personality routine.
@@ -987,6 +1127,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
BasicBlockNumbering.clear();
Markers.clear();
Intervals.clear();
+ LiveStarts.clear();
VNInfoAllocator.Reset();
unsigned NumSlots = MFI->getObjectIndexEnd();
@@ -998,6 +1139,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
SmallVector<int, 8> SortedSlots;
SortedSlots.reserve(NumSlots);
Intervals.reserve(NumSlots);
+ LiveStarts.resize(NumSlots);
unsigned NumMarkers = collectMarkers(NumSlots);
@@ -1069,6 +1211,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
});
+ for (auto &s : LiveStarts)
+ std::sort(s.begin(), s.end());
+
bool Changed = true;
while (Changed) {
Changed = false;
@@ -1084,12 +1229,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
int SecondSlot = SortedSlots[J];
LiveInterval *First = &*Intervals[FirstSlot];
LiveInterval *Second = &*Intervals[SecondSlot];
+ auto &FirstS = LiveStarts[FirstSlot];
+ auto &SecondS = LiveStarts[SecondSlot];
assert (!First->empty() && !Second->empty() && "Found an empty range");
- // Merge disjoint slots.
- if (!First->overlaps(*Second)) {
+ // Merge disjoint slots. This is a little bit tricky - see the
+ // Implementation Notes section for an explanation.
+ if (!First->isLiveAtIndexes(SecondS) &&
+ !Second->isLiveAtIndexes(FirstS)) {
Changed = true;
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+
+ int OldSize = FirstS.size();
+ FirstS.append(SecondS.begin(), SecondS.end());
+ auto Mid = FirstS.begin() + OldSize;
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
+
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index 9b7dd400..b4fa29d 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- StackMaps.cpp ---------------------------===//
+//===- StackMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,30 +8,41 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <iterator>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "stackmaps"
static cl::opt<int> StackMapVersion(
- "stackmap-version", cl::init(2),
- cl::desc("Specify the stackmap encoding version (default = 2)"));
+ "stackmap-version", cl::init(3),
+ cl::desc("Specify the stackmap encoding version (default = 3)"));
const char *StackMaps::WSMP = "Stack Maps: ";
@@ -74,7 +85,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
}
StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
- if (StackMapVersion != 2)
+ if (StackMapVersion != 3)
llvm_unreachable("Unsupported stackmap version!");
}
@@ -150,7 +161,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (SubRegIdx)
Offset = TRI->getSubRegIdxOffset(SubRegIdx);
- Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset);
+ Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC),
+ DwarfRegNum, Offset);
return ++MOI;
}
@@ -209,8 +221,9 @@ void StackMaps::print(raw_ostream &OS) {
OS << "Constant Index " << Loc.Offset;
break;
}
- OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size
- << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+ OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0"
+ << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0"
+ << ", .int " << Loc.Offset << "]\n";
Idx++;
}
@@ -234,7 +247,7 @@ void StackMaps::print(raw_ostream &OS) {
StackMaps::LiveOutReg
StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg));
return LiveOutReg(Reg, DwarfRegNum, Size);
}
@@ -276,7 +289,8 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
LiveOuts.erase(
- remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ llvm::remove_if(LiveOuts,
+ [](const LiveOutReg &LO) { return LO.Reg == 0; }),
LiveOuts.end());
return LiveOuts;
@@ -286,7 +300,6 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
-
MCContext &OutContext = AP.OutStreamer->getContext();
MCSymbol *MILabel = OutContext.createTempSymbol();
AP.OutStreamer->EmitLabel(MILabel);
@@ -378,6 +391,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
}
#endif
}
+
void StackMaps::recordStatepoint(const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
@@ -508,11 +522,16 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
for (const auto &Loc : CSLocs) {
OS.EmitIntValue(Loc.Type, 1);
- OS.EmitIntValue(Loc.Size, 1);
+ OS.EmitIntValue(0, 1); // Reserved
+ OS.EmitIntValue(Loc.Size, 2);
OS.EmitIntValue(Loc.Reg, 2);
+ OS.EmitIntValue(0, 2); // Reserved
OS.EmitIntValue(Loc.Offset, 4);
}
+ // Emit alignment to 8 byte.
+ OS.EmitValueToAlignment(8);
+
// Num live-out registers and padding to align to 4 byte.
OS.EmitIntValue(0, 2);
OS.EmitIntValue(LiveOuts.size(), 2);
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index c2c010a..d8e7840 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -1,4 +1,4 @@
-//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,30 +14,40 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cstdlib>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "stack-protector"
@@ -50,12 +60,14 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
cl::init(true), cl::Hidden);
char StackProtector::ID = 0;
-INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
- false, true)
-FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
- return new StackProtector(TM);
-}
+INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
+ "Insert stack protectors", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,
+ "Insert stack protectors", false, true)
+
+FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); }
StackProtector::SSPLayoutKind
StackProtector::getSSPLayout(const AllocaInst *AI) const {
@@ -83,12 +95,19 @@ void StackProtector::adjustForColoring(const AllocaInst *From,
}
}
+void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ Trip = TM->getTargetTriple();
TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
HasPrologue = false;
HasIRCheck = false;
@@ -222,7 +241,16 @@ bool StackProtector::RequiresStackProtector() {
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
+ // We are constructing the OptimizationRemarkEmitter on the fly rather than
+ // using the analysis pass to avoid building DominatorTree and LoopInfo which
+ // are not available this late in the IR pipeline.
+ OptimizationRemarkEmitter ORE(F);
+
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a function attribute or command-line switch");
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
@@ -236,20 +264,29 @@ bool StackProtector::RequiresStackProtector() {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
+ OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+ &I);
+ Remark
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a call to alloca or use of a variable length array";
if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
continue;
@@ -259,6 +296,11 @@ bool StackProtector::RequiresStackProtector() {
if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
: SSPLK_SmallArray));
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a stack allocated buffer or struct containing a "
+ "buffer");
NeedsProtector = true;
continue;
}
@@ -266,6 +308,11 @@ bool StackProtector::RequiresStackProtector() {
if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to the address of a local variable being taken");
NeedsProtector = true;
}
}
@@ -448,13 +495,13 @@ BasicBlock *StackProtector::CreateFailBB() {
Constant *StackChkFail =
M->getOrInsertFunction("__stack_smash_handler",
Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), nullptr);
+ Type::getInt8PtrTy(Context));
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail =
- M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
- nullptr);
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
+
B.CreateCall(StackChkFail, {});
}
B.CreateUnreachable();
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index 234b204..856bca1 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -22,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
@@ -32,7 +32,7 @@
#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "stackslotcoloring"
+#define DEBUG_TYPE "stack-slot-coloring"
static cl::opt<bool>
DisableSharing("no-stack-slot-sharing",
@@ -116,12 +116,12 @@ namespace {
char StackSlotColoring::ID = 0;
char &llvm::StackSlotColoringID = StackSlotColoring::ID;
-INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveStacks)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE,
"Stack Slot Coloring", false, false)
namespace {
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index e2377d8..489a607 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -1,4 +1,4 @@
-//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,22 +12,25 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+
using namespace llvm;
#define DEBUG_TYPE "tailduplication"
namespace {
+
/// Perform tail duplication. Delegates to TailDuplicator
class TailDuplicatePass : public MachineFunctionPass {
TailDuplicator Duplicator;
public:
static char ID;
+
explicit TailDuplicatePass() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -35,13 +38,13 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+} // end anonymous namespace
+
char TailDuplicatePass::ID = 0;
-}
char &llvm::TailDuplicateID = TailDuplicatePass::ID;
-INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false,
- false)
+INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false)
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
index 7709236..dc7265d 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -1,4 +1,4 @@
-//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
+//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,22 +12,36 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "tailduplication"
@@ -41,15 +55,13 @@ STATISTIC(NumTailDupRemoved,
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumAddedPHIs, "Number of phis added");
-namespace llvm {
-
// Heuristic for tail duplication.
static cl::opt<unsigned> TailDuplicateSize(
"tail-dup-size",
cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
cl::Hidden);
-cl::opt<unsigned> TailDupIndirectBranchSize(
+static cl::opt<unsigned> TailDupIndirectBranchSize(
"tail-dup-indirect-size",
cl::desc("Maximum instructions to consider tail duplicating blocks that "
"end with indirect branches."), cl::init(20),
@@ -138,7 +150,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(
bool IsSimple, MachineBasicBlock *MBB,
MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
// Save the successors list.
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
MBB->succ_end());
@@ -725,6 +737,7 @@ bool TailDuplicator::duplicateSimpleBB(
if (PredTBB == NextBB && PredFBB == nullptr)
PredTBB = nullptr;
+ auto DL = PredBB->findBranchDebugLoc();
TII->removeBranch(*PredBB);
if (!PredBB->isSuccessor(NewTarget))
@@ -735,7 +748,7 @@ bool TailDuplicator::duplicateSimpleBB(
}
if (PredTBB)
- TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+ TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
TDBBs.push_back(PredBB);
}
@@ -748,7 +761,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
if (PredBB->succ_size() > 1)
return false;
- MachineBasicBlock *PredTBB, *PredFBB;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
return false;
@@ -831,7 +844,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
appendCopies(PredBB, CopyInfos, Copies);
// Simplify
- MachineBasicBlock *PredTBB, *PredFBB;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
@@ -970,7 +983,7 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
/// the CFG.
void TailDuplicator::removeDeadBlock(
MachineBasicBlock *MBB,
- llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+ function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
@@ -984,5 +997,3 @@ void TailDuplicator::removeDeadBlock(
// Remove the block.
MBB->eraseFromParent();
}
-
-} // End llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f082add..9dd98b4 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,4 +1,4 @@
-//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
//
// The LLVM Compiler Infrastructure
//
@@ -14,19 +14,21 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cstdlib>
+
using namespace llvm;
-TargetFrameLowering::~TargetFrameLowering() {
-}
+TargetFrameLowering::~TargetFrameLowering() = default;
/// The default implementation just looks at attribute "no-frame-pointer-elim".
bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
@@ -73,7 +75,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
return;
// Get the callee saved register list...
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 01f91b9..14c5adc 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -345,12 +345,12 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
unsigned SubIdx, unsigned &Size,
unsigned &Offset,
const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!SubIdx) {
- Size = RC->getSize();
+ Size = TRI->getSpillSize(*RC);
Offset = 0;
return true;
}
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
@@ -364,10 +364,10 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
Size = BitSize /= 8;
Offset = (unsigned)BitOffset / 8;
- assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
+ assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range");
if (!MF.getDataLayout().isLittleEndian()) {
- Offset = RC->getSize() - (Offset + Size);
+ Offset = TRI->getSpillSize(*RC) - (Offset + Size);
}
return true;
}
@@ -428,8 +428,8 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
return nullptr;
}
-void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- llvm_unreachable("Not a MachO target");
+void TargetInstrInfo::getNoop(MCInst &NopInst) const {
+ llvm_unreachable("Not implemented");
}
static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
@@ -470,7 +470,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
MachineOperand &MO = MI.getOperand(i);
@@ -490,7 +490,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillOffset);
}
else
- MIB.addOperand(MO);
+ MIB.add(MO);
}
return NewMI;
}
@@ -941,12 +941,10 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
- if (MI.getOpcode() != FrameSetupOpcode &&
- MI.getOpcode() != FrameDestroyOpcode)
+ if (!isFrameInstr(MI))
return 0;
- int SPAdj = MI.getOperand(0).getImm();
- SPAdj = TFI->alignSPAdjust(SPAdj);
+ int SPAdj = TFI->alignSPAdjust(getFrameSize(MI));
if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
(StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 003311b..3914ee5 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
@@ -21,6 +20,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -33,6 +33,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -53,6 +54,18 @@ static cl::opt<unsigned> MaximumJumpTableSize
("max-jump-table-size", cl::init(0), cl::Hidden,
cl::desc("Set maximum size of jump tables; zero for no limit."));
+/// Minimum jump table density for normal functions.
+static cl::opt<unsigned>
+ JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "a normal function"));
+
+/// Minimum jump table density for -Os or -Oz functions.
+static cl::opt<unsigned> OptsizeJumpTableDensity(
+ "optsize-jump-table-density", cl::init(40), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "an optsize function"));
+
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
@@ -361,11 +374,36 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::MEMCPY] = "memcpy";
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
- Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memcpy_element_unordered_atomic_1";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memcpy_element_unordered_atomic_2";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memcpy_element_unordered_atomic_4";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memcpy_element_unordered_atomic_8";
+ Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memcpy_element_unordered_atomic_16";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memmove_element_unordered_atomic_1";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memmove_element_unordered_atomic_2";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memmove_element_unordered_atomic_4";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memmove_element_unordered_atomic_8";
+ Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memmove_element_unordered_atomic_16";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] =
+ "__llvm_memset_element_unordered_atomic_1";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] =
+ "__llvm_memset_element_unordered_atomic_2";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] =
+ "__llvm_memset_element_unordered_atomic_4";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] =
+ "__llvm_memset_element_unordered_atomic_8";
+ Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] =
+ "__llvm_memset_element_unordered_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -768,22 +806,55 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
return UNKNOWN_LIBCALL;
}
-RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+}
+
+RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
- return MEMCPY_ELEMENT_ATOMIC_1;
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
- return MEMCPY_ELEMENT_ATOMIC_2;
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
- return MEMCPY_ELEMENT_ATOMIC_4;
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
- return MEMCPY_ELEMENT_ATOMIC_8;
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
- return MEMCPY_ELEMENT_ATOMIC_16;
+ return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
+}
+RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
+ case 2:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
+ case 4:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
+ case 8:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
+ case 16:
+ return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
}
/// InitCmpLibcallCCs - Set default comparison libcall CC.
@@ -829,16 +900,16 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
- MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
- MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
- = MaxStoresPerMemmoveOptSize = 4;
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
+ MaxLoadsPerMemcmp = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
+ MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
- MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
@@ -851,7 +922,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
- GatherAllAliasesMaxDepth = 6;
+ GatherAllAliasesMaxDepth = 18;
MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
@@ -901,6 +972,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::ABS, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -910,6 +982,11 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
+ // ADDCARRY operations default to expand
+ setOperationAction(ISD::ADDCARRY, VT, Expand);
+ setOperationAction(ISD::SUBCARRY, VT, Expand);
+ setOperationAction(ISD::SETCCCARRY, VT, Expand);
+
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
@@ -918,6 +995,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector()) {
@@ -1184,12 +1262,11 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
/// isLegalRC - Return true if the value types that can be represented by the
/// specified register class are all legal.
-bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
+bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) const {
+ for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
if (isTypeLegal(*I))
return true;
- }
return false;
}
@@ -1227,7 +1304,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Copy operands before the frame-index.
for (unsigned i = 0; i < OperIdx; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirect-mem-ref tag, size, #FI, offset.
@@ -1237,18 +1314,18 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
} else {
// direct-mem-ref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
}
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Inherit previous memory operands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1296,12 +1373,12 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
// Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
- for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ for (unsigned i : SuperRegRC.set_bits()) {
const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
// We want the largest possible spill size.
- if (SuperRC->getSize() <= BestRC->getSize())
+ if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
continue;
- if (!isLegalRC(SuperRC))
+ if (!isLegalRC(*TRI, *SuperRC))
continue;
BestRC = SuperRC;
}
@@ -1437,6 +1514,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeWidenVector: {
// Try to widen the vector.
@@ -1454,6 +1532,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeSplitVector:
case TypeScalarizeVector: {
@@ -1589,7 +1668,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
+void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
@@ -1601,9 +1680,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1616,18 +1695,20 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
VT = MinVT;
}
- unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
- MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned NumParts =
+ TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+ MVT PartVT =
+ TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1818,7 +1899,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0), nullptr);
+ StackPtrTy->getPointerTo(0));
return IRB.CreateCall(Fn);
}
@@ -1902,6 +1983,10 @@ void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
MinimumJumpTableEntries = Val;
}
+unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
+ return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+}
+
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
return MaximumJumpTableSize;
}
@@ -1918,11 +2003,7 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
const Function *F = MF.getFunction();
- StringRef RecipAttrName = "reciprocal-estimates";
- if (!F->hasFnAttribute(RecipAttrName))
- return StringRef();
-
- return F->getFnAttribute(RecipAttrName).getValueAsString();
+ return F->getFnAttribute("reciprocal-estimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
@@ -2097,3 +2178,7 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT,
MachineFunction &MF) const {
return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
}
+
+void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
+ MF.getRegInfo().freezeReservedRegs(MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index eb2a28f..6922e33 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,49 +14,109 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/ProfileData/InstrProf.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <string>
+
using namespace llvm;
using namespace dwarf;
+static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
+ StringRef &Section) {
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
+ for (const auto &MFE: ModuleFlags) {
+ // Ignore flags with 'Require' behaviour.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ if (Key == "Objective-C Image Info Version") {
+ Version = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated" ||
+ Key == "Objective-C Class Properties" ||
+ Key == "Objective-C Image Swift Version") {
+ Flags |= mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+ } else if (Key == "Objective-C Image Info Section") {
+ Section = cast<MDString>(MFE.Val)->getString();
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// ELF
//===----------------------------------------------------------------------===//
+void TargetLoweringObjectFileELF::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (Section.empty())
+ return;
+
+ auto &C = getContext();
+ auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(Version, 4);
+ Streamer.EmitIntValue(Flags, 4);
+ Streamer.AddBlankLine();
+}
+
MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
const GlobalValue *GV, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
- if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
+ if ((Encoding & 0x80) == DW_EH_PE_indirect)
return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
TM.getSymbol(GV)->getName());
- if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ if ((Encoding & 0x70) == DW_EH_PE_absptr)
return TM.getSymbol(GV);
report_fatal_error("We do not support this DWARF encoding yet!");
}
@@ -86,8 +146,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
-
- if (Encoding & dwarf::DW_EH_PE_indirect) {
+ if (Encoding & DW_EH_PE_indirect) {
MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
@@ -102,7 +161,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -117,8 +176,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// section(".eh_frame") gcc will produce:
//
// .section .eh_frame,"a",@progbits
-
- if (Name == getInstrProfCoverageSectionName(false))
+
+ if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
+ /*AddSegmentInfo=*/false))
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
@@ -149,7 +209,6 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
return K;
}
-
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
// Use SHT_NOTE for section whose name starts with ".note" to allow
// emitting ELF notes from C variable declaration.
@@ -211,10 +270,47 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
return C;
}
+static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
+ const TargetMachine &TM) {
+ MDNode *MD = GO->getMetadata(LLVMContext::MD_associated);
+ if (!MD)
+ return nullptr;
+
+ const MDOperand &Op = MD->getOperand(0);
+ if (!Op.get())
+ return nullptr;
+
+ auto *VM = dyn_cast<ValueAsMetadata>(Op);
+ if (!VM)
+ report_fatal_error("MD_associated operand is not ValueAsMetadata");
+
+ GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue());
+ return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
+ // Check if '#pragma clang section' name is applicable.
+ // Note that pragma directive overrides -ffunction-section, -fdata-section
+ // and so section name is exactly as user specified and not uniqued.
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+ if (GV && GV->hasImplicitSection()) {
+ auto Attrs = GV->getAttributes();
+ if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+ SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+ } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+ SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+ SectionName = Attrs.getAttribute("data-section").getValueAsString();
+ }
+ }
+ const Function *F = dyn_cast<Function>(GO);
+ if (F && F->hasFnAttribute("implicit-section-name")) {
+ SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
+ }
+
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
@@ -224,9 +320,23 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
Group = C->getName();
Flags |= ELF::SHF_GROUP;
}
- return getContext().getELFSection(SectionName,
- getELFSectionType(SectionName, Kind), Flags,
- /*EntrySize=*/0, Group);
+
+ // A section can have at most one associated section. Put each global with
+ // MD_associated in a unique section.
+ unsigned UniqueID = MCContext::GenericSectionID;
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ UniqueID = NextUniqueID++;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = getContext().getELFSection(
+ SectionName, getELFSectionType(SectionName, Kind), Flags,
+ /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+ // Make sure that we did not get some other section with incompatible sh_link.
+ // This should not be possible due to UniqueID code above.
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
/// Return the section prefix name used by options FunctionsSections and
@@ -248,11 +358,10 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM, bool EmitUniqueSection,
- unsigned Flags, unsigned *NextUniqueID) {
+static MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
+ unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
unsigned EntrySize = 0;
if (Kind.isMergeableCString()) {
if (Kind.isMergeable2ByteCString()) {
@@ -319,7 +428,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group, UniqueID);
+ EntrySize, Group, UniqueID, AssociatedSymbol);
}
MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -337,8 +446,17 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
}
EmitUniqueSection |= GO->hasComdat();
- return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
- EmitUniqueSection, Flags, &NextUniqueID);
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = selectELFSectionForGlobal(
+ getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags,
+ &NextUniqueID, AssociatedSymbol);
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -351,8 +469,9 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
return ReadOnlySection;
return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
- getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
- &NextUniqueID);
+ getMangler(), TM, EmitUniqueSection,
+ ELF::SHF_ALLOC, &NextUniqueID,
+ /* AssociatedSymbol */ nullptr);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -500,40 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
}
}
-/// emitModuleFlags - Perform code emission for module flags.
-void TargetLoweringObjectFileMachO::emitModuleFlags(
- MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- const TargetMachine &TM) const {
- unsigned VersionVal = 0;
- unsigned ImageInfoFlags = 0;
- MDNode *LinkerOptions = nullptr;
- StringRef SectionVal;
-
- for (const auto &MFE : ModuleFlags) {
- // Ignore flags with 'Require' behavior.
- if (MFE.Behavior == Module::Require)
- continue;
-
- StringRef Key = MFE.Key->getString();
- Metadata *Val = MFE.Val;
-
- if (Key == "Objective-C Image Info Version") {
- VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue();
- } else if (Key == "Objective-C Garbage Collection" ||
- Key == "Objective-C GC Only" ||
- Key == "Objective-C Is Simulated" ||
- Key == "Objective-C Class Properties" ||
- Key == "Objective-C Image Swift Version") {
- ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue();
- } else if (Key == "Objective-C Image Info Section") {
- SectionVal = cast<MDString>(Val)->getString();
- } else if (Key == "Linker Options") {
- LinkerOptions = cast<MDNode>(Val);
- }
- }
-
+void TargetLoweringObjectFileMachO::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
// Emit the linker options if present.
- if (LinkerOptions) {
+ if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
for (const auto &Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
for (const auto &Piece : cast<MDNode>(Option)->operands())
@@ -542,8 +631,15 @@ void TargetLoweringObjectFileMachO::emitModuleFlags(
}
}
+ unsigned VersionVal = 0;
+ unsigned ImageInfoFlags = 0;
+ StringRef SectionVal;
+
+ GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal);
+
// The section is mandatory. If we don't have it, then we don't have GC info.
- if (SectionVal.empty()) return;
+ if (SectionVal.empty())
+ return;
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
@@ -723,7 +819,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -1055,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
-void TargetLoweringObjectFileCOFF::emitModuleFlags(
- MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- const TargetMachine &TM) const {
- MDNode *LinkerOptions = nullptr;
-
- for (const auto &MFE : ModuleFlags) {
- StringRef Key = MFE.Key->getString();
- if (Key == "Linker Options")
- LinkerOptions = cast<MDNode>(MFE.Val);
- }
-
- if (LinkerOptions) {
+void TargetLoweringObjectFileCOFF::emitModuleMetadata(
+ MCStreamer &Streamer, Module &M, const TargetMachine &TM) const {
+ if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
// Emit the linker options to the linker .drectve section. According to the
// spec, this section is a space-separated string containing flags for
// linker.
@@ -1081,6 +1168,24 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags(
}
}
}
+
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (Section.empty())
+ return;
+
+ auto &C = getContext();
+ auto *S = C.getCOFFSection(
+ Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(Version, 4);
+ Streamer.EmitIntValue(Flags, 4);
+ Streamer.AddBlankLine();
}
void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
@@ -1122,33 +1227,110 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
raw_ostream &OS, const GlobalValue *GV) const {
- if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
- return;
+ emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
+}
- const Triple &TT = getTargetTriple();
+//===----------------------------------------------------------------------===//
+// Wasm
+//===----------------------------------------------------------------------===//
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << " /EXPORT:";
- else
- OS << " -export:";
-
- if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
- std::string Flag;
- raw_string_ostream FlagOS(Flag);
- getMangler().getNameWithPrefix(FlagOS, GV, false);
- FlagOS.flush();
- if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
- OS << Flag.substr(1);
- else
- OS << Flag;
- } else {
- getMangler().getNameWithPrefix(OS, GV, false);
+static const Comdat *getWasmComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
+MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ llvm_unreachable("getExplicitSectionGlobal not yet implemented");
+ return nullptr;
+}
+
+static MCSectionWasm *
+selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ StringRef Group = "";
+ if (getWasmComdat(GO))
+ llvm_unreachable("comdat not yet supported for wasm");
+
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind);
+
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ const auto &OptionalPrefix = F->getSectionPrefix();
+ if (OptionalPrefix)
+ Name += *OptionalPrefix;
}
- if (!GV->getValueType()->isFunctionTy()) {
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << ",DATA";
- else
- OS << ",data";
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true);
+ }
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection && !UniqueSectionNames) {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
}
+ return Ctx.getWasmSection(Name, /*Type=*/0, Flags,
+ Group, UniqueID);
+}
+
+MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ if (Kind.isCommon())
+ report_fatal_error("mergable sections not supported yet on wasm");
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ EmitUniqueSection |= GO->hasComdat();
+
+ return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ EmitUniqueSection, /*Flags=*/0,
+ &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic sanity checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_None,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
+}
+
+void
+TargetLoweringObjectFileWasm::InitializeWasm() {
+ // TODO: Initialize StaticCtorSection and StaticDtorSection.
}
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index b6da8e0..ed845e1 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -11,10 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -34,14 +34,6 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
return false;
}
-/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
-/// is specified on the command line. When this flag is off(default), the
-/// code generator is not allowed to generate mad (multiply add) if the
-/// result is "less precise" than doing those operations individually.
-bool TargetOptions::LessPreciseFPMAD() const {
- return UnsafeFPMath || LessPreciseFPMADOption;
-}
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index e7ea2b4..817e58c 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1,4 +1,4 @@
-//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
+//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,28 +13,37 @@
//===---------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetPassConfig.h"
-
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterUsageInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -92,6 +101,19 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(false),
cl::ZeroOrMore);
+static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
+ cl::Hidden,
+ cl::desc("Enable machine outliner"));
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<cl::boolOrDefault>
+ EnableGlobalISel("global-isel", cl::Hidden,
+ cl::desc("Enable the \"global\" instruction selector"));
static cl::opt<std::string>
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
@@ -211,6 +233,7 @@ char TargetPassConfig::EarlyTailDuplicateID = 0;
char TargetPassConfig::PostRAMachineLICMID = 0;
namespace {
+
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
@@ -231,9 +254,11 @@ struct InsertedPass {
return NP;
}
};
-}
+
+} // end anonymous namespace
namespace llvm {
+
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@@ -249,7 +274,8 @@ public:
/// is inserted after each instance of the first one.
SmallVector<InsertedPass, 4> InsertedPasses;
};
-} // namespace llvm
+
+} // end namespace llvm
// Out of line virtual method.
TargetPassConfig::~TargetPassConfig() {
@@ -258,11 +284,8 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
-TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
- AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true) {
-
+TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
+ : ImmutablePass(ID), PM(&pm), TM(&TM) {
Impl = new PassConfigImpl();
// Register all target independent codegen passes to activate their PassIDs,
@@ -278,7 +301,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
- TM->Options.PrintMachineCode = true;
+ TM.Options.PrintMachineCode = true;
+
+ if (TM.Options.EnableIPRA)
+ setRequiresCodeGenSCCOrder();
}
CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
@@ -303,12 +329,14 @@ void TargetPassConfig::insertPass(AnalysisID TargetPassID,
///
/// Targets may override this to extend TargetPassConfig.
TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new TargetPassConfig(this, PM);
+ return new TargetPassConfig(*this, PM);
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(nullptr) {
- llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+ : ImmutablePass(ID) {
+ report_fatal_error("Trying to construct TargetPassConfig without a target "
+ "machine. Scheduling a CodeGen pass without a target "
+ "triple set?");
}
// Helper to verify the analysis is really immutable.
@@ -421,7 +449,12 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) {
}
void TargetPassConfig::addVerifyPass(const std::string &Banner) {
- if (VerifyMachineCode)
+ bool Verify = VerifyMachineCode;
+#ifdef EXPENSIVE_CHECKS
+ if (VerifyMachineCode == cl::BOU_UNSET)
+ Verify = TM->isMachineVerifierClean();
+#endif
+ if (Verify)
PM->add(createMachineVerifierPass(Banner));
}
@@ -480,6 +513,14 @@ void TargetPassConfig::addIRPasses() {
// Insert calls to mcount-like functions.
addPass(createCountingFunctionInserterPass());
+
+ // Add scalarization of target's unsupported masked memory intrinsics pass.
+ // the unsupported intrinsic will be replaced with a chain of basic blocks,
+ // that stores/loads element one-by-one if the appropriate mask bit is set.
+ addPass(createScalarizeMaskedMemIntrinPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ addPass(createExpandReductionsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -499,14 +540,14 @@ void TargetPassConfig::addPassesToHandleExceptions() {
LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- addPass(createDwarfEHPass(TM));
+ addPass(createDwarfEHPass());
break;
case ExceptionHandling::WinEH:
// We support using both GCC-style and MSVC-style exceptions on Windows, so
// add both preparation passes. Each pass will only actually run if it
// recognizes the personality function.
- addPass(createWinEHPass(TM));
- addPass(createDwarfEHPass(TM));
+ addPass(createWinEHPass());
+ addPass(createDwarfEHPass());
break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -521,7 +562,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
/// before exception handling preparation passes.
void TargetPassConfig::addCodeGenPrepare() {
if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
- addPass(createCodeGenPreparePass(TM));
+ addPass(createCodeGenPreparePass());
addPass(createRewriteSymbolsPass());
}
@@ -531,13 +572,13 @@ void TargetPassConfig::addISelPrepare() {
addPreISel();
// Force codegen to run according to the callgraph.
- if (TM->Options.EnableIPRA)
+ if (requiresCodeGenSCCOrder())
addPass(new DummyCGSCCPass);
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
- addPass(createSafeStackPass(TM));
- addPass(createStackProtectorPass(TM));
+ addPass(createSafeStackPass());
+ addPass(createStackProtectorPass());
if (PrintISelInput)
addPass(createPrintFunctionPass(
@@ -549,6 +590,74 @@ void TargetPassConfig::addISelPrepare() {
addPass(createVerifierPass());
}
+bool TargetPassConfig::addCoreISelPasses() {
+ // Enable FastISel with -fast, but allow that to be overridden.
+ TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ // Enable GlobalISel if the target wants to, but allow that to be overriden.
+ if (EnableGlobalISel == cl::BOU_TRUE ||
+ (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) {
+ if (addIRTranslator())
+ return true;
+
+ addPreLegalizeMachineIR();
+
+ if (addLegalizeMachineIR())
+ return true;
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ addPreRegBankSelect();
+
+ if (addRegBankSelect())
+ return true;
+
+ addPreGlobalInstructionSelect();
+
+ if (addGlobalInstructionSelect())
+ return true;
+
+ // Pass to reset the MachineFunction if the ISel failed.
+ addPass(createResetMachineFunctionPass(
+ reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled()));
+
+ // Provide a fallback path when we do not want to abort on
+ // not-yet-supported input.
+ if (!isGlobalISelAbortEnabled() && addInstSelector())
+ return true;
+
+ } else if (addInstSelector())
+ return true;
+
+ return false;
+}
+
+bool TargetPassConfig::addISelPasses() {
+ if (TM->Options.EmulatedTLS)
+ addPass(createLowerEmuTLSPass());
+
+ addPass(createPreISelIntrinsicLoweringPass());
+ addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ addIRPasses();
+ addCodeGenPrepare();
+ addPassesToHandleExceptions();
+ addISelPrepare();
+
+ return addCoreISelPasses();
+}
+
+/// -regalloc=... command line option.
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
/// Add the complete set of target-independent postISel code generator passes.
///
/// This can be read as the standard order of major LLVM CodeGen stages. Stages
@@ -607,8 +716,12 @@ void TargetPassConfig::addMachinePasses() {
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
addOptimizedRegAlloc(createRegAllocPass(true));
- else
+ else {
+ if (RegAlloc != &useDefaultRegisterAllocator &&
+ RegAlloc != &createFastRegisterAllocator)
+ report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
addFastRegAlloc(createRegAllocPass(false));
+ }
// Run post-ra passes.
addPostRegAlloc();
@@ -620,7 +733,7 @@ void TargetPassConfig::addMachinePasses() {
// Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
// do so if it hasn't been disabled, substituted, or overridden.
if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID))
- addPass(createPrologEpilogInserterPass(TM));
+ addPass(createPrologEpilogInserterPass());
/// Add passes that optimize machine instructions after register allocation.
if (getOptLevel() != CodeGenOpt::None)
@@ -668,9 +781,15 @@ void TargetPassConfig::addMachinePasses() {
addPass(&StackMapLivenessID, false);
addPass(&LiveDebugValuesID, false);
+ // Insert before XRay Instrumentation.
+ addPass(&FEntryInserterID, false);
+
addPass(&XRayInstrumentationID, false);
addPass(&PatchableFunctionID, false);
+ if (EnableMachineOutliner)
+ PM->add(createMachineOutlinerPass());
+
AddingMachinePasses = false;
}
@@ -704,6 +823,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineLICMID, false);
addPass(&MachineCSEID, false);
+
+ // Coalesce basic blocks with the same branch condition
+ addPass(&BranchCoalescingID);
+
addPass(&MachineSinkingID);
addPass(&PeepholeOptimizerID);
@@ -730,20 +853,13 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
-LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag);
-static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
+
static RegisterRegAlloc
defaultRegAlloc("default",
"pick register allocator based on -O option",
useDefaultRegisterAllocator);
-/// -regalloc=... command line option.
-static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
- RegisterPassParser<RegisterRegAlloc> >
-RegAlloc("regalloc",
- cl::init(&useDefaultRegisterAllocator),
- cl::desc("Register allocator to use"));
-
static void initializeDefaultRegisterAllocatorOnce() {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
@@ -753,7 +869,6 @@ static void initializeDefaultRegisterAllocatorOnce() {
}
}
-
/// Instantiate the default register allocator pass for this target for either
/// the optimized or unoptimized allocation path. This will be added to the pass
/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
@@ -903,6 +1018,11 @@ void TargetPassConfig::addBlockPlacement() {
//===---------------------------------------------------------------------===//
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::isGlobalISelEnabled() const {
+ return false;
+}
+
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
return EnableGlobalISelAbort == 1;
}
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index cd50c5b..eeb00a7 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,27 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "target-reg-info"
@@ -38,7 +48,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
CoveringLanes(SRICoveringLanes) {
}
-TargetRegisterInfo::~TargetRegisterInfo() {}
+TargetRegisterInfo::~TargetRegisterInfo() = default;
void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
const {
@@ -50,8 +60,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
ArrayRef<MCPhysReg> Exceptions) const {
// Check that all super registers of reserved regs are reserved as well.
BitVector Checked(getNumRegs());
- for (int Reg = RegisterSet.find_first(); Reg>=0;
- Reg = RegisterSet.find_next(Reg)) {
+ for (unsigned Reg : RegisterSet.set_bits()) {
if (Checked[Reg])
continue;
for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
@@ -127,7 +136,7 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-} // End of llvm namespace
+} // end namespace llvm
/// getAllocatableClass - Return the maximal subclass of the given register
/// class that is alloctable, or NULL.
@@ -155,10 +164,9 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
// Pick the most sub register class of the right type that contains
// this physreg.
const TargetRegisterClass* BestRC = nullptr;
- for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
- const TargetRegisterClass* RC = *I;
- if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || BestRC->hasSubClass(RC)))
+ for (const TargetRegisterClass* RC : regclasses()) {
+ if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) &&
+ RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
}
@@ -185,10 +193,9 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
if (SubClass)
getAllocatableSetForRC(MF, SubClass, Allocatable);
} else {
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- if ((*I)->isAllocatable())
- getAllocatableSetForRC(MF, *I, Allocatable);
+ for (const TargetRegisterClass *C : regclasses())
+ if (C->isAllocatable())
+ getAllocatableSetForRC(MF, C, Allocatable);
}
// Mask out the reserved registers
@@ -209,7 +216,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A,
if (unsigned Common = *A++ & *B++) {
const TargetRegisterClass *RC =
TRI->getRegClass(I + countTrailingZeros(Common));
- if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+ if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT))
return RC;
}
return nullptr;
@@ -267,7 +274,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
const TargetRegisterClass *BestRC = nullptr;
unsigned *BestPreA = &PreA;
unsigned *BestPreB = &PreB;
- if (RCA->getSize() < RCB->getSize()) {
+ if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) {
std::swap(RCA, RCB);
std::swap(SubA, SubB);
std::swap(BestPreA, BestPreB);
@@ -275,7 +282,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
// Also terminate the search one we have found a register class as small as
// RCA.
- unsigned MinSize = RCA->getSize();
+ unsigned MinSize = getRegSizeInBits(*RCA);
for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
@@ -283,7 +290,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
// Check if a common super-register class exists for this index pair.
const TargetRegisterClass *RC =
firstCommonClass(IA.getMask(), IB.getMask(), this);
- if (!RC || RC->getSize() < MinSize)
+ if (!RC || getRegSizeInBits(*RC) < MinSize)
continue;
// The indexes must compose identically: PreA+SubA == PreB+SubB.
@@ -292,7 +299,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
continue;
// Is RC a better candidate than BestRC?
- if (BestRC && RC->getSize() >= BestRC->getSize())
+ if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC))
continue;
// Yes, RC is the smallest super-register seen so far.
@@ -301,7 +308,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
*BestPreB = IB.getSubReg();
// Bail early if we reached MinSize. We won't find a better candidate.
- if (BestRC->getSize() == MinSize)
+ if (getRegSizeInBits(*BestRC) == MinSize)
return BestRC;
}
}
@@ -415,9 +422,9 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void
-TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
- const TargetRegisterInfo *TRI) {
+LLVM_DUMP_METHOD
+void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
}
#endif
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 83e52d3..9210ea8 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,11 +13,21 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -37,13 +47,14 @@ bool TargetSchedModel::hasInstrItineraries() const {
static unsigned gcd(unsigned Dividend, unsigned Divisor) {
// Dividend and Divisor will be naturally swapped as needed.
- while(Divisor) {
+ while (Divisor) {
unsigned Rem = Dividend % Divisor;
Dividend = Divisor;
Divisor = Rem;
};
return Dividend;
}
+
static unsigned lcm(unsigned A, unsigned B) {
unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
assert((LCM >= A && LCM >= B) && "LCM overflow");
@@ -73,6 +84,29 @@ void TargetSchedModel::init(const MCSchedModel &sm,
}
}
+/// Returns true only if instruction is specified as single issue.
+bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->BeginGroup;
+ }
+ return false;
+}
+
+bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->EndGroup;
+ }
+ return false;
+}
+
unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrItineraries()) {
@@ -100,7 +134,6 @@ static unsigned capLatency(int Cycles) {
/// evaluation of predicates that depend on instruction operands or flags.
const MCSchedClassDesc *TargetSchedModel::
resolveSchedClass(const MachineInstr *MI) const {
-
// Get the definition's scheduling class descriptor from this machine model.
unsigned SchedClass = MI->getDesc().getSchedClass();
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
@@ -244,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
if (SCDesc->isValid() && !SCDesc->isVariant())
return computeInstrLatency(*SCDesc);
- llvm_unreachable("No MI sched latency");
+ if (SCDesc->isValid()) {
+ assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
+ return computeInstrLatency(*SCDesc);
+ }
+ return 0;
}
unsigned
@@ -298,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
}
return 0;
}
+
+static Optional<double>
+getRThroughputFromItineraries(unsigned schedClass,
+ const InstrItineraryData *IID){
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const InstrStage *IS = IID->beginStage(schedClass),
+ *E = IID->endStage(schedClass);
+ IS != E; ++IS) {
+ unsigned Cycles = IS->getCycles();
+ if (!Cycles)
+ continue;
+ Throughput =
+ std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+static Optional<double>
+getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
+ const TargetSubtargetInfo *STI,
+ const MCSchedModel &SchedModel) {
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
+ *WEnd = STI->getWriteProcResEnd(SCDesc);
+ WPR != WEnd; ++WPR) {
+ unsigned Cycles = WPR->Cycles;
+ if (!Cycles)
+ return Optional<double>();
+
+ unsigned NumUnits =
+ SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+ Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries())
+ return getRThroughputFromItineraries(MI->getDesc().getSchedClass(),
+ getInstrItineraries());
+ if (hasInstrSchedModel())
+ return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
+ SchedModel);
+ return Optional<double>();
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
+ unsigned SchedClass = TII->get(Opcode).getSchedClass();
+ if (hasInstrItineraries())
+ return getRThroughputFromItineraries(SchedClass, getInstrItineraries());
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc->isValid() && !SCDesc->isVariant())
+ return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+ }
+ return Optional<double>();
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index c74707d..f6d5bc8 100644
--- a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
using namespace llvm;
-//---------------------------------------------------------------------------
-// TargetSubtargetInfo Class
-//
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
@@ -26,7 +31,7 @@ TargetSubtargetInfo::TargetSubtargetInfo(
: MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
}
-TargetSubtargetInfo::~TargetSubtargetInfo() {}
+TargetSubtargetInfo::~TargetSubtargetInfo() = default;
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
@@ -52,3 +57,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
bool TargetSubtargetInfo::useAA() const {
return false;
}
+
+static std::string createSchedInfoStr(unsigned Latency,
+ Optional<double> RThroughput) {
+ static const char *SchedPrefix = " sched: [";
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ if (Latency > 0 && RThroughput.hasValue())
+ CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
+ << "]";
+ else if (Latency > 0)
+ CS << SchedPrefix << Latency << ":?]";
+ else if (RThroughput.hasValue())
+ CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
+ CS.flush();
+ return Comment;
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
+ if (MI.isPseudo() || MI.isTerminator())
+ return std::string();
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ unsigned Latency = TSchedModel.computeInstrLatency(&MI);
+ Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
+ return createSchedInfoStr(Latency, RThroughput);
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ if (!TSchedModel.hasInstrSchedModel())
+ return std::string();
+ unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ Optional<double> RThroughput =
+ TSchedModel.computeInstrRThroughput(MCI.getOpcode());
+ return createSchedInfoStr(Latency, RThroughput);
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0f1b2ed..83c00e2 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -52,7 +52,7 @@
using namespace llvm;
-#define DEBUG_TYPE "twoaddrinstr"
+#define DEBUG_TYPE "twoaddressinstruction"
STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
@@ -68,6 +68,13 @@ EnableRescheduling("twoaddr-reschedule",
cl::desc("Coalesce copies by rescheduling (default=true)"),
cl::init(true), cl::Hidden);
+// Limit the number of dataflow edges to traverse when evaluating the benefit
+// of commuting operands.
+static cl::opt<unsigned> MaxDataFlowEdge(
+ "dataflow-edge-limit", cl::Hidden, cl::init(3),
+ cl::desc("Maximum number of dataflow edges to traverse when evaluating "
+ "the benefit of commuting operands"));
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -155,7 +162,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
+ AU.addUsedIfAvailable<AAResultsWrapperPass>();
AU.addUsedIfAvailable<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
@@ -171,10 +178,10 @@ public:
} // end anonymous namespace
char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
@@ -637,10 +644,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, 3))
+ if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, 3))
+ if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute
@@ -905,7 +912,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
++End;
}
- // Check if the reschedule will not break depedencies.
+ // Check if the reschedule will not break dependencies.
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
@@ -1627,7 +1634,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
InstrItins = MF->getSubtarget().getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>())
+ AA = &AAPass->getAAResults();
+ else
+ AA = nullptr;
OptLevel = TM.getOptLevel();
bool MadeChange = false;
@@ -1785,7 +1795,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(TargetOpcode::COPY))
.addReg(DstReg, RegState::Define, SubIdx)
- .addOperand(UseMO);
+ .add(UseMO);
// The first def needs an <undef> flag because there is no live register
// before it.
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index c2db56a..407fd9b 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -195,18 +196,31 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
}
if (phi->getNumOperands() == 3) {
- unsigned Input = phi->getOperand(1).getReg();
- unsigned Output = phi->getOperand(0).getReg();
-
- phi++->eraseFromParent();
+ const MachineOperand &Input = phi->getOperand(1);
+ const MachineOperand &Output = phi->getOperand(0);
+ unsigned InputReg = Input.getReg();
+ unsigned OutputReg = Output.getReg();
+ assert(Output.getSubReg() == 0 && "Cannot have output subregister");
ModifiedPHI = true;
- if (Input != Output) {
+ if (InputReg != OutputReg) {
MachineRegisterInfo &MRI = F.getRegInfo();
- MRI.constrainRegClass(Input, MRI.getRegClass(Output));
- MRI.replaceRegWith(Output, Input);
+ unsigned InputSub = Input.getSubReg();
+ if (InputSub == 0 &&
+ MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
+ MRI.replaceRegWith(OutputReg, InputReg);
+ } else {
+ // The input register to the PHI has a subregister or it can't be
+ // constrained to the proper register class:
+ // insert a COPY instead of simply replacing the output
+ // with the input.
+ const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
+ BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), OutputReg)
+ .addReg(InputReg, getRegState(Input), InputSub);
+ }
+ phi++->eraseFromParent();
}
-
continue;
}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 0d506d6..f8aacdb 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -72,9 +72,21 @@ void VirtRegMap::grow() {
Virt2SplitMap.resize(NumRegs);
}
+void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+ "attempt to assign physical register to already mapped "
+ "virtual register");
+ assert(!getRegInfo().isReserved(physReg) &&
+ "Attempt to map virtReg to a reserved physReg");
+ Virt2PhysMap[virtReg] = physReg;
+}
+
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
- int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ unsigned Size = TRI->getSpillSize(*RC);
+ unsigned Align = TRI->getSpillAlignment(*RC);
+ int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
++NumSpillSlots;
return SS;
}
@@ -167,6 +179,8 @@ class VirtRegRewriter : public MachineFunctionPass {
bool readsUndefSubreg(const MachineOperand &MO) const;
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
+ void expandCopyBundle(MachineInstr &MI) const;
+ bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const;
public:
static char ID;
@@ -367,11 +381,67 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
}
if (Indexes)
- Indexes->removeMachineInstrFromMaps(MI);
- MI.eraseFromParent();
+ Indexes->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
DEBUG(dbgs() << " deleted.\n");
}
+/// The liverange splitting logic sometimes produces bundles of copies when
+/// subregisters are involved. Expand these into a sequence of copy instructions
+/// after processing the last in the bundle. Does not update LiveIntervals
+/// which we shouldn't need for this instruction anymore.
+void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
+ if (!MI.isCopy())
+ return;
+
+ if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+ // Only do this when the complete bundle is made out of COPYs.
+ MachineBasicBlock &MBB = *MI.getParent();
+ for (MachineBasicBlock::reverse_instr_iterator I =
+ std::next(MI.getReverseIterator()), E = MBB.instr_rend();
+ I != E && I->isBundledWithSucc(); ++I) {
+ if (!I->isCopy())
+ return;
+ }
+
+ for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
+ I->isBundledWithPred(); ) {
+ MachineInstr &MI = *I;
+ ++I;
+
+ MI.unbundleFromPred();
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(MI);
+ }
+ }
+}
+
+/// Check whether (part of) \p SuperPhysReg is live through \p MI.
+/// \pre \p MI defines a subregister of a virtual register that
+/// has been assigned to \p SuperPhysReg.
+bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
+ unsigned SuperPhysReg) const {
+ SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+ SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
+ SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
+ for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) {
+ const LiveRange &UnitRange = LIS->getRegUnit(*Unit);
+ // If the regunit is live both before and after MI,
+ // we assume it is live through.
+ // Generally speaking, this is not true, because something like
+ // "RU = op RU" would match that description.
+ // However, we know that we are trying to assess whether
+ // a def of a virtual reg, vreg, is live at the same time of RU.
+ // If we are in the "RU = op RU" situation, that means that vreg
+ // is defined at the same time as RU (i.e., "vreg, RU = op RU").
+ // Thus, vreg and RU interferes and vreg cannot be assigned to
+ // SuperPhysReg. Therefore, this situation cannot happen.
+ if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses))
+ return true;
+ }
+ return false;
+}
+
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@@ -409,7 +479,8 @@ void VirtRegRewriter::rewrite() {
// A virtual register kill refers to the whole register, so we may
// have to add <imp-use,kill> operands for the super-register. A
// partial redef always kills and redefines the super-register.
- if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
+ (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
@@ -431,12 +502,14 @@ void VirtRegRewriter::rewrite() {
}
}
- // The <def,undef> flag only makes sense for sub-register defs, and
- // we are substituting a full physreg. An <imp-use,kill> operand
- // from the SuperKills list will represent the partial read of the
- // super-register.
- if (MO.isDef())
+ // The <def,undef> and <def,internal> flags only make sense for
+ // sub-register defs, and we are substituting a full physreg. An
+ // <imp-use,kill> operand from the SuperKills list will represent the
+ // partial read of the super-register.
+ if (MO.isDef()) {
MO.setIsUndef(false);
+ MO.setIsInternalRead(false);
+ }
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, SubReg);
@@ -461,6 +534,8 @@ void VirtRegRewriter::rewrite() {
DEBUG(dbgs() << "> " << *MI);
+ expandCopyBundle(*MI);
+
// We can remove identity copies right now.
handleIdentityCopy(*MI);
}
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 568720c..c63a0a9 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -16,13 +16,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCSymbol.h"
@@ -54,7 +54,7 @@ namespace {
class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
+ WinEHPrepare() : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -86,6 +86,7 @@ private:
// All fields are reset by runOnFunction.
EHPersonality Personality = EHPersonality::Unknown;
+ const DataLayout *DL = nullptr;
DenseMap<BasicBlock *, ColorVector> BlockColors;
MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
@@ -93,12 +94,10 @@ private:
} // end anonymous namespace
char WinEHPrepare::ID = 0;
-INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
- false, false)
+INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",
+ false, false)
-FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
- return new WinEHPrepare(TM);
-}
+FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); }
bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!Fn.hasPersonalityFn())
@@ -111,6 +110,7 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!isFuncletEHPersonality(Personality))
return false;
+ DL = &Fn.getParent()->getDataLayout();
return prepareExplicitEH(Fn);
}
@@ -1070,7 +1070,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
if (!isa<TerminatorInst>(EHPad)) {
// If the EHPad isn't a terminator, then we can insert a load in this block
// that will dominate all uses.
- SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
@@ -1157,7 +1157,7 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Function &F) {
// Lazilly create the spill slot.
if (!SpillSlot)
- SpillSlot = new AllocaInst(V->getType(), nullptr,
+ SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(V->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 63bd762..0b4c6e5 100644
--- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -1,4 +1,4 @@
-//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===//
+//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,18 +14,26 @@
//
//===---------------------------------------------------------------------===//
-#include "llvm/CodeGen/Analysis.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
namespace {
+
struct XRayInstrumentation : public MachineFunctionPass {
static char ID;
@@ -33,6 +41,14 @@ struct XRayInstrumentation : public MachineFunctionPass {
initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry());
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override;
private:
@@ -43,7 +59,7 @@ private:
// This is the approach to go on CPUs which have a single RET instruction,
// like x86/x86_64.
void replaceRetWithPatchableRet(MachineFunction &MF,
- const TargetInstrInfo *TII);
+ const TargetInstrInfo *TII);
// Prepend the original return instruction with the exit sled code ("patchable
// function exit" pseudo-instruction), preserving the original return
@@ -54,13 +70,13 @@ private:
// have to call the trampoline and return from it to the original return
// instruction of the function being instrumented.
void prependRetWithPatchableExit(MachineFunction &MF,
- const TargetInstrInfo *TII);
+ const TargetInstrInfo *TII);
};
-} // anonymous namespace
-void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
- const TargetInstrInfo *TII)
-{
+} // end anonymous namespace
+
+void XRayInstrumentation::replaceRetWithPatchableRet(
+ MachineFunction &MF, const TargetInstrInfo *TII) {
// We look for *all* terminators and returns, then replace those with
// PATCHABLE_RET instructions.
SmallVector<MachineInstr *, 4> Terminators;
@@ -81,7 +97,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
.addImm(T.getOpcode());
for (auto &MO : T.operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
Terminators.push_back(&T);
}
}
@@ -91,9 +107,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
I->eraseFromParent();
}
-void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
- const TargetInstrInfo *TII)
-{
+void XRayInstrumentation::prependRetWithPatchableExit(
+ MachineFunction &MF, const TargetInstrInfo *TII) {
for (auto &MBB : MF) {
for (auto &T : MBB.terminators()) {
unsigned Opc = 0;
@@ -106,7 +121,7 @@ void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
if (Opc != 0) {
// Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or
// PATCHABLE_TAIL_CALL .
- BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc));
+ BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc));
}
}
}
@@ -125,14 +140,24 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
return false; // XRay threshold attribute not found.
if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
- if (F.size() < XRayThreshold)
- return false; // Function is too small.
+
+ // Count the number of MachineInstr`s in MachineFunction
+ int64_t MICount = 0;
+ for (const auto& MBB : MF)
+ MICount += MBB.size();
+
+ // Check if we have a loop.
+ // FIXME: Maybe make this smarter, and see whether the loops are dependent
+ // on inputs or side-effects?
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ if (MLI.empty() && MICount < XRayThreshold)
+ return false; // Function is too small and has no loops.
}
// We look for the first non-empty MachineBasicBlock, so that we can insert
// the function instrumentation in the appropriate place.
- auto MBI =
- find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+ auto MBI = llvm::find_if(
+ MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
if (MBI == MF.end())
return false; // The function is empty.
@@ -142,12 +167,10 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
if (!MF.getSubtarget().isXRaySupported()) {
FirstMI.emitError("An attempt to perform XRay instrumentation for an"
- " unsupported target.");
+ " unsupported target.");
return false;
}
- // FIXME: Do the loop triviality analysis here or in an earlier pass.
-
// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
// MachineFunction.
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
@@ -157,6 +180,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
+ case Triple::ArchType::ppc64le:
+ case Triple::ArchType::mips:
+ case Triple::ArchType::mipsel:
+ case Triple::ArchType::mips64:
+ case Triple::ArchType::mips64el:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;
@@ -171,5 +199,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
char XRayInstrumentation::ID = 0;
char &llvm::XRayInstrumentationID = XRayInstrumentation::ID;
-INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops",
- false, false)
+INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation",
+ "Insert XRay ops", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation",
+ "Insert XRay ops", false, false)
OpenPOWER on IntegriCloud