summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h5
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp79
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp275
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp692
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h29
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp390
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp31
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp42
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h18
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h12
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h3
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp32
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp256
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h28
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp372
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h84
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp170
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h165
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp72
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h15
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp137
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h31
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h4
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp268
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h14
-rw-r--r--contrib/llvm/lib/CodeGen/BranchRelaxation.cpp510
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp519
-rw-r--r--contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp62
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp21
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp170
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp839
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp175
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp60
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp180
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp354
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp182
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp427
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp171
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp474
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp63
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp1225
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp401
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp43
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp114
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp90
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp56
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp105
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp70
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp53
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp211
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h55
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp23
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/LowLevelType.cpp71
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp78
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h10
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp485
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h49
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp195
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp156
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp144
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp27
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp473
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp52
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp300
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp60
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp193
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp305
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePipeliner.cpp3984
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp214
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp40
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp221
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/PatchableFunction.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp68
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp172
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h1
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp25
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp9
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp308
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp149
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp119
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp28
-rw-r--r--contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp67
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp62
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp35
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp244
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2333
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp63
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp192
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp57
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp330
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp248
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp37
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h7
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp153
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h19
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp1013
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1060
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h19
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp24
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp316
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp582
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp116
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp14
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp276
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h31
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp61
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp7
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp5
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplicator.cpp254
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp10
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp95
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp293
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp262
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp48
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp46
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp129
196 files changed, 19958 insertions, 7727 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index a736884..bb90861 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -161,8 +161,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
// callee-saved register that is not saved in the prolog.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- BitVector Pristine = MFI->getPristineRegs(MF);
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI.getPristineRegs(MF);
for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 40451c0..d840a2f 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -48,7 +48,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
});
#ifndef NDEBUG
for (unsigned I = 0, E = Hints.size(); I != E; ++I)
- assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+ assert(is_contained(Order, Hints[I]) &&
"Target hint is outside allocation order.");
#endif
}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
index 2aee3a6..8223a52 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -18,6 +18,7 @@
#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCRegisterInfo.h"
namespace llvm {
@@ -79,9 +80,7 @@ public:
bool isHint() const { return Pos <= 0; }
/// Return true if PhysReg is a preferred register.
- bool isHint(unsigned PhysReg) const {
- return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
- }
+ bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index d690734..79ecc43 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -272,28 +272,10 @@ static const Value *getNoopInput(const Value *V,
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
NoopInput = Op;
- } else if (isa<CallInst>(I)) {
- // Look through call (skipping callee)
- for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
- i != e; ++i) {
- unsigned attrInd = i - I->op_begin() + 1;
- if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
- isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
- NoopInput = *i;
- break;
- }
- }
- } else if (isa<InvokeInst>(I)) {
- // Look through invoke (skipping BB, BB, Callee)
- for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
- i != e; ++i) {
- unsigned attrInd = i - I->op_begin() + 1;
- if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
- isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
- NoopInput = *i;
- break;
- }
- }
+ } else if (auto CS = ImmutableCallSite(I)) {
+ const Value *ReturnedOp = CS.getReturnedArgOperand();
+ if (ReturnedOp && isNoopBitcast(ReturnedOp->getType(), I->getType(), TLI))
+ NoopInput = ReturnedOp;
} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
// Value may come from either the aggregate or the scalar
ArrayRef<unsigned> InsertLoc = IVI->getIndices();
@@ -525,19 +507,15 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
}
-bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
- const Instruction *I,
- const ReturnInst *Ret,
- const TargetLoweringBase &TLI) {
- // If the block ends with a void return or unreachable, it doesn't matter
- // what the call's return type is.
- if (!Ret || Ret->getNumOperands() == 0) return true;
+bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI,
+ bool *AllowDifferingSizes) {
+ // ADS may be null, so don't write to it directly.
+ bool DummyADS;
+ bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
+ ADS = true;
- // If the return value is undef, it doesn't matter what the call's
- // return type is.
- if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
- // Make sure the attributes attached to each return are compatible.
AttrBuilder CallerAttrs(F->getAttributes(),
AttributeSet::ReturnIndex);
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
@@ -545,22 +523,21 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// Noalias is completely benign as far as calling convention goes, it
// shouldn't affect whether the call is a tail call.
- CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
- CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
+ CallerAttrs.removeAttribute(Attribute::NoAlias);
+ CalleeAttrs.removeAttribute(Attribute::NoAlias);
- bool AllowDifferingSizes = true;
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
return false;
- AllowDifferingSizes = false;
+ ADS = false;
CallerAttrs.removeAttribute(Attribute::ZExt);
CalleeAttrs.removeAttribute(Attribute::ZExt);
} else if (CallerAttrs.contains(Attribute::SExt)) {
if (!CalleeAttrs.contains(Attribute::SExt))
return false;
- AllowDifferingSizes = false;
+ ADS = false;
CallerAttrs.removeAttribute(Attribute::SExt);
CalleeAttrs.removeAttribute(Attribute::SExt);
}
@@ -568,7 +545,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// If they're still different, there's some facet we don't understand
// (currently only "inreg", but in future who knows). It may be OK but the
// only safe option is to reject the tail call.
- if (CallerAttrs != CalleeAttrs)
+ return CallerAttrs == CalleeAttrs;
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+ const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI) {
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Make sure the attributes attached to each return are compatible.
+ bool AllowDifferingSizes;
+ if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
return false;
const Value *RetVal = Ret->getOperand(0), *CallVal = I;
@@ -672,7 +666,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
DenseMap<const MachineBasicBlock *, int> FuncletMembership;
// We don't have anything to do if there aren't any EH pads.
- if (!MF.getMMI().hasEHFunclets())
+ if (!MF.hasEHFunclets())
return FuncletMembership;
int EntryBBNumber = MF.front().getNumber();
@@ -694,9 +688,10 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
}
MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+
// CatchPads are not funclets for SEH so do not consider CatchRet to
// transfer control to another funclet.
- if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+ if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
continue;
// FIXME: SEH CatchPads are not necessarily in the parent function:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 5294c98..61149d9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -43,13 +43,6 @@ ARMTargetStreamer &ARMException::getTargetStreamer() {
return static_cast<ARMTargetStreamer &>(TS);
}
-/// endModule - Emit all exception information that should come after the
-/// content.
-void ARMException::endModule() {
- if (shouldEmitCFI)
- Asm->OutStreamer->EmitCFISections(false, true);
-}
-
void ARMException::beginFunction(const MachineFunction *MF) {
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
getTargetStreamer().emitFnStart();
@@ -57,7 +50,14 @@ void ARMException::beginFunction(const MachineFunction *MF) {
AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
assert(MoveType != AsmPrinter::CFI_M_EH &&
"non-EH CFI not yet supported in prologue with EHABI lowering");
+
if (MoveType == AsmPrinter::CFI_M_Debug) {
+ if (!hasEmittedCFISections) {
+ if (Asm->needsOnlyDebugCFIMoves())
+ Asm->OutStreamer->EmitCFISections(false, true);
+ hasEmittedCFISections = true;
+ }
+
shouldEmitCFI = true;
Asm->OutStreamer->EmitCFIStartProc(false);
}
@@ -75,7 +75,7 @@ void ARMException::endFunction(const MachineFunction *MF) {
F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
F->needsUnwindTableEntry();
bool shouldEmitPersonality = forceEmitPersonality ||
- !MMI->getLandingPads().empty();
+ !MF->getLandingPads().empty();
if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
!shouldEmitPersonality)
ATS.emitCantUnwind();
@@ -99,8 +99,9 @@ void ARMException::endFunction(const MachineFunction *MF) {
}
void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 8c68383..ec552e0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -14,8 +14,6 @@
using namespace llvm;
-class MCExpr;
-
unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
HasBeenUsed = true;
auto IterBool =
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 272bace..24fdbfc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -37,6 +37,8 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
@@ -55,10 +57,15 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "Debug Info Emission";
-static const char *const EHTimerName = "DWARF Exception Writer";
-static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "emit";
+static const char *const DbgTimerDescription = "Debug Info Emission";
+static const char *const EHTimerName = "write_exception";
+static const char *const EHTimerDescription = "DWARF Exception Writer";
+static const char *const CodeViewLineTablesGroupName = "linetables";
+static const char *const CodeViewLineTablesGroupDescription =
+ "CodeView Line Tables";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -101,7 +108,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- LastMI(nullptr), LastFn(0), Counter(~0U) {
+ isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
DD = nullptr;
MMI = nullptr;
LI = nullptr;
@@ -143,7 +150,7 @@ const DataLayout &AsmPrinter::getDataLayout() const {
}
// Do not use the cached DataLayout because some client use it without a Module
-// (llmv-dsymutil, llvm-dwarfdump).
+// (llvm-dsymutil, llvm-dwarfdump).
unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
@@ -155,17 +162,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
S.EmitInstruction(Inst, getSubtargetInfo());
}
-StringRef AsmPrinter::getTargetTriple() const {
- return TM.getTargetTriple().str();
-}
-
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
- return OutStreamer->getCurrentSection().first;
+ return OutStreamer->getCurrentSectionOnly();
}
-
-
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -184,8 +185,6 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->InitSections(false);
- Mang = new Mangler();
-
// Emit the version-min deplyment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -194,7 +193,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// alternative is duplicated code in each of the target asm printers that
// use the directive, where it would need the same conditionalization
// anyway.
- Triple TT(getTargetTriple());
+ const Triple &TT = TM.getTargetTriple();
// If there is a version specified, Major will be non-zero.
if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
unsigned Major, Minor, Update;
@@ -250,18 +249,43 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
- if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() ||
+ TM.getTargetTriple().isWindowsItaniumEnvironment())) {
Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
- DbgTimerName,
- CodeViewLineTablesGroupName));
+ DbgTimerName, DbgTimerDescription,
+ CodeViewLineTablesGroupName,
+ CodeViewLineTablesGroupDescription));
}
if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
DD->beginModule();
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+ Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
+ DWARFGroupName, DWARFGroupDescription));
}
}
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ isCFIMoveForDebugging = true;
+ if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+ break;
+ for (auto &F: M.getFunctionList()) {
+ // If the module contains any function with unwind data,
+ // .eh_frame has to be emitted.
+ // Ignore functions that won't get emitted.
+ if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) {
+ isCFIMoveForDebugging = false;
+ break;
+ }
+ }
+ break;
+ default:
+ isCFIMoveForDebugging = false;
+ break;
+ }
+
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
@@ -286,7 +310,8 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
if (ES)
- Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
+ Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
+ DWARFGroupName, DWARFGroupDescription));
return false;
}
@@ -340,11 +365,11 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
const GlobalValue *GV) const {
- TM.getNameWithPrefix(Name, GV, *Mang);
+ TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler());
}
MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
- return TM.getSymbol(GV, *Mang);
+ return TM.getSymbol(GV);
}
/// EmitGlobalVariable - Emit the specified global variable to the .s file.
@@ -407,7 +432,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
unsigned AlignLog = getGVAlignmentLog2(GV, DL);
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+ HI.TimerGroupName, HI.TimerGroupDescription,
+ TimePassesIsEnabled);
HI.Handler->setSymbolSize(GVSym, Size);
}
@@ -424,8 +451,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
}
// Determine to which section this global should be emitted.
- MCSection *TheSection =
- getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+ MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM);
// If we have a bss global going to a section that supports the
// zerofill directive, do so here.
@@ -483,7 +509,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
// Emit the .tbss symbol
MCSymbol *MangSym =
- OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+ OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
@@ -535,12 +561,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (MAI->hasDotTypeDotSizeDirective())
// .size foo, 42
- OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
+ OutStreamer->emitELFSize(EmittedInitSym,
MCConstantExpr::create(Size, OutContext));
OutStreamer->AddBlankLine();
}
+/// Emit the directive and value for debug thread local expression
+///
+/// \p Value - The value to emit.
+/// \p Size - The size of the integer (in bytes) to emit.
+void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+ unsigned Size) const {
+ OutStreamer->EmitValue(Value, Size);
+}
+
/// EmitFunctionHeader - This method emits the header for the current
/// function.
void AsmPrinter::EmitFunctionHeader() {
@@ -550,8 +585,7 @@ void AsmPrinter::EmitFunctionHeader() {
// Print the 'header' of function.
const Function *F = MF->getFunction();
- OutStreamer->SwitchSection(
- getObjFileLowering().SectionForGlobal(F, *Mang, TM));
+ OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
EmitVisibility(CurrentFnSym, F->getVisibility());
EmitLinkage(F, CurrentFnSym);
@@ -598,7 +632,8 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit pre-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->beginFunction(MF);
}
@@ -632,26 +667,26 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
// Check for spills and reloads
int FI;
- const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
// We assume a single instruction only has a spill or reload, not
// both.
const MachineMemOperand *MMO;
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
- if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Reload\n";
}
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (FrameInfo->isSpillSlotObjectIndex(FI))
+ if (MFI.isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Reload\n";
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
- if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
CommentOS << MMO->getSize() << "-byte Spill\n";
}
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (FrameInfo->isSpillSlotObjectIndex(FI))
+ if (MFI.isSpillSlotObjectIndex(FI))
CommentOS << MMO->getSize() << "-byte Folded Spill\n";
}
@@ -711,9 +746,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << V->getName();
const DIExpression *Expr = MI->getDebugExpression();
- if (Expr->isBitPiece())
- OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
- << " size=" << Expr->getBitPieceSize() << "]";
+ auto Fragment = Expr->getFragmentInfo();
+ if (Fragment)
+ OS << " [fragment offset=" << Fragment->OffsetInBits
+ << " size=" << Fragment->SizeInBits << "]";
OS << " <- ";
// The second operand is only an offset if it's an immediate.
@@ -721,21 +757,21 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
- if (Deref) {
+ uint64_t Op = Expr->getElement(i);
+ if (Op == dwarf::DW_OP_LLVM_fragment) {
+ // There can't be any operands after this in a valid expression
+ break;
+ } else if (Deref) {
// We currently don't support extra Offsets or derefs after the first
// one. Bail out early instead of emitting an incorrect comment
OS << " [complex expression]";
AP.OutStreamer->emitRawComment(OS.str());
return true;
- }
- uint64_t Op = Expr->getElement(i);
- if (Op == dwarf::DW_OP_deref) {
+ } else if (Op == dwarf::DW_OP_deref) {
Deref = true;
continue;
- } else if (Op == dwarf::DW_OP_bit_piece) {
- // There can't be any operands after this in a valid expression
- break;
}
+
uint64_t ExtraOffset = Expr->getElement(i++);
if (Op == dwarf::DW_OP_plus)
Offset += ExtraOffset;
@@ -756,7 +792,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// There is no good way to print long double. Convert a copy to
// double. Ah well, it's only a comment.
bool ignored;
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
&ignored);
OS << "(long double) " << APF.convertToDouble();
}
@@ -819,8 +855,7 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
- const MachineModuleInfo &MMI = MF->getMMI();
- const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
+ const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
emitCFIInstruction(CFI);
@@ -862,7 +897,8 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+ HI.TimerGroupName, HI.TimerGroupDescription,
TimePassesIsEnabled);
HI.Handler->beginInstruction(&MI);
}
@@ -906,7 +942,8 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+ HI.TimerGroupName, HI.TimerGroupDescription,
TimePassesIsEnabled);
HI.Handler->endInstruction();
}
@@ -944,8 +981,8 @@ void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
- if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+ if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
// Create a symbol for the end of function.
CurrentFnEnd = createTempSymbol("func_end");
OutStreamer->EmitLabel(CurrentFnEnd);
@@ -959,12 +996,12 @@ void AsmPrinter::EmitFunctionBody() {
const MCExpr *SizeExp = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
- if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym))
- OutStreamer->emitELFSize(Sym, SizeExp);
+ OutStreamer->emitELFSize(CurrentFnSym, SizeExp);
}
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->markFunctionEnd();
}
@@ -973,10 +1010,10 @@ void AsmPrinter::EmitFunctionBody() {
// Emit post-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endFunction(MF);
}
- MMI->EndFunction();
OutStreamer->AddBlankLine();
}
@@ -1100,8 +1137,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
(!BaseObject || BaseObject->hasPrivateLinkage())) {
const DataLayout &DL = M.getDataLayout();
uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
- OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
- MCConstantExpr::create(Size, OutContext));
+ OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
}
}
}
@@ -1143,7 +1179,7 @@ bool AsmPrinter::doFinalization(Module &M) {
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
if (!ModuleFlags.empty())
- TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+ TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM);
if (TM.getTargetTriple().isOSBinFormatELF()) {
MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -1164,8 +1200,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
- TimePassesIsEnabled);
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endModule();
delete HI.Handler;
}
@@ -1246,7 +1282,6 @@ bool AsmPrinter::doFinalization(Module &M) {
// after everything else has gone out.
EmitEndOfAsmFile(M);
- delete Mang; Mang = nullptr;
MMI = nullptr;
OutStreamer->Finish();
@@ -1269,8 +1304,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
- MMI->hasEHFunclets() || NeedsLocalForSize) {
+ if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MF.hasEHFunclets() || NeedsLocalForSize) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -1392,7 +1427,7 @@ void AsmPrinter::EmitJumpTableInfo() {
*F);
if (JTInDiffSection) {
// Drop it in the readonly section.
- MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+ MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM);
OutStreamer->SwitchSection(ReadOnlySection);
}
@@ -1536,12 +1571,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
/* isCtor */ true);
- if (TM.getRelocationModel() == Reloc::Static &&
- MAI->hasStaticCtorDtorReferenceInStaticMode()) {
- StringRef Sym(".constructors_used");
- OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
- MCSA_Reference);
- }
return true;
}
@@ -1549,12 +1578,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
/* isCtor */ false);
- if (TM.getRelocationModel() == Reloc::Static &&
- MAI->hasStaticCtorDtorReferenceInStaticMode()) {
- StringRef Sym(".destructors_used");
- OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
- MCSA_Reference);
- }
return true;
}
@@ -1699,7 +1722,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size,
bool IsSectionRelative) const {
if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
- OutStreamer->EmitCOFFSecRel32(Label);
+ OutStreamer->EmitCOFFSecRel32(Label, Offset);
+ if (Size > 4)
+ OutStreamer->EmitZeros(Size - 4);
return;
}
@@ -1764,7 +1789,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
+ if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
if (C != CE)
return lowerConstant(C);
@@ -1796,7 +1821,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// expression properly. This is important for differences between
// blockaddress labels. Since the two labels are in the same function, it
// is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
+ LLVM_FALLTHROUGH;
case Instruction::BitCast:
return lowerConstant(CE->getOperand(0));
@@ -1843,8 +1868,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
APInt RHSOffset;
if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
getDataLayout())) {
- const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(
- LHSGV, RHSGV, *Mang, TM);
+ const MCExpr *RelocExpr =
+ getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
if (!RelocExpr)
RelocExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
@@ -2299,7 +2324,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, DL);
+ Constant *New = ConstantFoldConstant(CE, DL);
if (New && New != CE)
return emitGlobalConstantImpl(DL, New, AP);
}
@@ -2385,8 +2410,7 @@ MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
StringRef Suffix) const {
- return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang,
- TM);
+ return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM);
}
/// Return the MCSymbol for the specified ExternalSymbol.
@@ -2599,12 +2623,12 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (GCPI != GCMap.end())
return GCPI->second.get();
- const char *Name = S.getName().c_str();
+ auto Name = S.getName();
for (GCMetadataPrinterRegistry::iterator
I = GCMetadataPrinterRegistry::begin(),
E = GCMetadataPrinterRegistry::end(); I != E; ++I)
- if (strcmp(Name, I->getName()) == 0) {
+ if (Name == I->getName()) {
std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
GMP->S = &S;
auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
@@ -2618,3 +2642,76 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
AsmPrinterHandler::~AsmPrinterHandler() {}
void AsmPrinterHandler::markFunctionEnd() {}
+
+// In the binary's "xray_instr_map" section, an array of these function entries
+// describes each instrumentation point. When XRay patches your code, the index
+// into this table will be given to your handler as a patch point identifier.
+void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out,
+ const MCSymbol *CurrentFnSym) const {
+ Out->EmitSymbolValue(Sled, Bytes);
+ Out->EmitSymbolValue(CurrentFnSym, Bytes);
+ auto Kind8 = static_cast<uint8_t>(Kind);
+ Out->EmitBytes(StringRef(reinterpret_cast<const char *>(&Kind8), 1));
+ Out->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1));
+ Out->EmitZeros(2 * Bytes - 2); // Pad the previous two entries
+}
+
+void AsmPrinter::emitXRayTable() {
+ if (Sleds.empty())
+ return;
+
+ auto PrevSection = OutStreamer->getCurrentSectionOnly();
+ auto Fn = MF->getFunction();
+ MCSection *Section = nullptr;
+ if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) {
+ if (Fn->hasComdat()) {
+ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+ Fn->getComdat()->getName());
+ } else {
+ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
+ }
+ } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
+ Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+ SectionKind::getReadOnlyWithRel());
+ } else {
+ llvm_unreachable("Unsupported target");
+ }
+
+ // Before we switch over, we force a reference to a label inside the
+ // xray_instr_map section. Since this function is always called just
+ // before the function's end, we assume that this is happening after
+ // the last return instruction.
+
+ auto WordSizeBytes = TM.getPointerSize();
+ MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+ OutStreamer->EmitCodeAlignment(16);
+ OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false);
+ OutStreamer->SwitchSection(Section);
+ OutStreamer->EmitLabel(Tmp);
+ for (const auto &Sled : Sleds)
+ Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym);
+
+ OutStreamer->SwitchSection(PrevSection);
+ Sleds.clear();
+}
+
+void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
+ SledKind Kind) {
+ auto Fn = MI.getParent()->getParent()->getFunction();
+ auto Attr = Fn->getFnAttribute("function-instrument");
+ bool AlwaysInstrument =
+ Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ Sleds.emplace_back(
+ XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+}
+
+uint16_t AsmPrinter::getDwarfVersion() const {
+ return OutStreamer->getContext().getDwarfVersion();
+}
+
+void AsmPrinter::setDwarfVersion(uint16_t Version) {
+ OutStreamer->getContext().setDwarfVersion(Version);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 60f40d0..0185c38 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -138,8 +138,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
const MCExpr *Exp =
- TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
- *OutStreamer);
+ TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer);
OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
} else
OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
@@ -150,7 +149,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
if (!ForceOffset) {
// On COFF targets, we have to emit the special .secrel32 directive.
if (MAI->needsDwarfSectionOffsetDirective()) {
- OutStreamer->EmitCOFFSecRel32(Label);
+ OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
return;
}
@@ -175,36 +174,6 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
EmitInt32(S.getOffset());
}
-/// EmitDwarfRegOp - Emit dwarf register operation.
-void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
- const MachineLocation &MLoc) const {
- DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer);
- const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
- int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
- if (Reg < 0) {
- // We assume that pointers are always in an addressable register.
- if (MLoc.isIndirect())
- // FIXME: We have no reasonable way of handling errors in here. The
- // caller might be in the middle of a dwarf expression. We should
- // probably assert that Reg >= 0 once debug info generation is more
- // mature.
- return Expr.EmitOp(dwarf::DW_OP_nop,
- "nop (could not find a dwarf register number)");
-
- // Attempt to find a valid super- or sub-register.
- if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(),
- MLoc.getReg()))
- Expr.EmitOp(dwarf::DW_OP_nop,
- "nop (could not find a dwarf register number)");
- return;
- }
-
- if (MLoc.isIndirect())
- Expr.AddRegIndirect(Reg, MLoc.getOffset());
- else
- Expr.AddReg(Reg);
-}
-
//===----------------------------------------------------------------------===//
// Dwarf Lowering Routines
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 2ce6c18..57864e4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -100,6 +100,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
SourceMgr SrcMgr;
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
SrcMgrDiagInfo DiagInfo;
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
@@ -193,6 +195,23 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
if (Done) break;
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
+ LastEmitted += 2;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (!StrEnd)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ AP->PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index ebf80de..8344051 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,17 +13,20 @@
#include "CodeViewDebug.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDumper.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
@@ -35,9 +38,11 @@
using namespace llvm;
using namespace llvm::codeview;
+using namespace llvm::msf;
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
- : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
+ TypeTable(Allocator), CurFn(nullptr) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
@@ -108,8 +113,9 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
if (Insertion.second) {
// We have to compute the full filepath and emit a .cv_file directive.
StringRef FullPath = getFullFilepath(F);
- NextId = OS.EmitCVFileDirective(NextId, FullPath);
- assert(NextId == FileIdMap.size() && ".cv_file directive failed");
+ bool Success = OS.EmitCVFileDirective(NextId, FullPath);
+ (void)Success;
+ assert(Success && ".cv_file directive failed");
}
return Insertion.first->second;
}
@@ -120,7 +126,16 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
InlineSite *Site = &SiteInsertion.first->second;
if (SiteInsertion.second) {
+ unsigned ParentFuncId = CurFn->FuncId;
+ if (const DILocation *OuterIA = InlinedAt->getInlinedAt())
+ ParentFuncId =
+ getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram())
+ .SiteFuncId;
+
Site->SiteFuncId = NextFuncId++;
+ OS.EmitCVInlineSiteIdDirective(
+ Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
+ InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
Site->Inlinee = Inlinee;
InlinedSubprograms.insert(Inlinee);
getFuncIdForSubprogram(Inlinee);
@@ -208,8 +223,8 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
// Build the fully qualified name of the scope.
std::string ScopeName = getFullyQualifiedName(Scope);
- TypeIndex TI =
- TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName));
+ StringIdRecord SID(TypeIndex(), ScopeName);
+ auto TI = TypeTable.writeKnownType(SID);
return recordTypeIndexForDINode(Scope, TI);
}
@@ -234,12 +249,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
TypeIndex ClassType = getTypeIndex(Class);
MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
DisplayName);
- TI = TypeTable.writeMemberFuncId(MFuncId);
+ TI = TypeTable.writeKnownType(MFuncId);
} else {
// Otherwise, this must be a free function.
TypeIndex ParentScope = getScopeIndex(Scope);
FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
- TI = TypeTable.writeFuncId(FuncId);
+ TI = TypeTable.writeKnownType(FuncId);
}
return recordTypeIndexForDINode(SP, TI);
@@ -353,8 +368,8 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
}
OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
- /*PrologueEnd=*/false,
- /*IsStmt=*/false, DL->getFilename());
+ /*PrologueEnd=*/false, /*IsStmt=*/false,
+ DL->getFilename(), SMLoc());
}
void CodeViewDebug::emitCodeViewMagicVersion() {
@@ -377,6 +392,11 @@ void CodeViewDebug::endModule() {
// Use the generic .debug$S section, and make a subsection for all the inlined
// subprograms.
switchToDebugSectionForSymbol(nullptr);
+
+ MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ emitCompilerInformation();
+ endCVSubsection(CompilerInfo);
+
emitInlineeLinesSubsection();
// Emit per-function debug information.
@@ -418,10 +438,13 @@ void CodeViewDebug::endModule() {
}
static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
- // Microsoft's linker seems to have trouble with symbol names longer than
- // 0xffd8 bytes.
- S = S.substr(0, 0xffd8);
- SmallString<32> NullTerminatedString(S);
+ // The maximum CV record length is 0xFF00. Most of the strings we emit appear
+ // after a fixed length portion of the record. The fixed length portion should
+ // always be less than 0xF00 (3840) bytes, so truncate the string so that the
+ // overall record size is less than the maximum allowed.
+ unsigned MaxFixedRecordLength = 0xF00;
+ SmallString<32> NullTerminatedString(
+ S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
NullTerminatedString.push_back('\0');
OS.EmitBytes(NullTerminatedString);
}
@@ -446,48 +469,175 @@ void CodeViewDebug::emitTypeInformation() {
CommentPrefix += ' ';
}
- CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false);
- TypeTable.ForEachRecord(
- [&](TypeIndex Index, StringRef Record) {
- if (OS.isVerboseAsm()) {
- // Emit a block comment describing the type record for readability.
- SmallString<512> CommentBlock;
- raw_svector_ostream CommentOS(CommentBlock);
- ScopedPrinter SP(CommentOS);
- SP.setPrefix(CommentPrefix);
- CVTD.setPrinter(&SP);
- Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()});
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
- // emitRawComment will insert its own tab and comment string before
- // the first line, so strip off our first one. It also prints its own
- // newline.
- OS.emitRawComment(
- CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
- } else {
+ TypeDatabase TypeDB;
+ CVTypeDumper CVTD(TypeDB);
+ TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) {
+ if (OS.isVerboseAsm()) {
+ // Emit a block comment describing the type record for readability.
+ SmallString<512> CommentBlock;
+ raw_svector_ostream CommentOS(CommentBlock);
+ ScopedPrinter SP(CommentOS);
+ SP.setPrefix(CommentPrefix);
+ TypeDumpVisitor TDV(TypeDB, &SP, false);
+ Error E = CVTD.dump(Record, TDV);
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+ // emitRawComment will insert its own tab and comment string before
+ // the first line, so strip off our first one. It also prints its own
+ // newline.
+ OS.emitRawComment(
+ CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
+ } else {
#ifndef NDEBUG
- // Assert that the type data is valid even if we aren't dumping
- // comments. The MSVC linker doesn't do much type record validation,
- // so the first link of an invalid type record can succeed while
- // subsequent links will fail with LNK1285.
- ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()});
- CVTypeArray Types;
- StreamReader Reader(Stream);
- Error E = Reader.readArray(Types, Reader.getLength());
- if (!E) {
- TypeVisitorCallbacks C;
- E = CVTypeVisitor(C).visitTypeStream(Types);
- }
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
+ // Assert that the type data is valid even if we aren't dumping
+ // comments. The MSVC linker doesn't do much type record validation,
+ // so the first link of an invalid type record can succeed while
+ // subsequent links will fail with LNK1285.
+ ByteStream Stream(Record);
+ CVTypeArray Types;
+ StreamReader Reader(Stream);
+ Error E = Reader.readArray(Types, Reader.getLength());
+ if (!E) {
+ TypeVisitorCallbacks C;
+ E = CVTypeVisitor(C).visitTypeStream(Types);
+ }
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
#endif
- }
- OS.EmitBinaryData(Record);
- });
+ }
+ StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
+ OS.EmitBinaryData(S);
+ });
+}
+
+namespace {
+
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::C;
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ return SourceLanguage::Cpp;
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ return SourceLanguage::Fortran;
+ case dwarf::DW_LANG_Pascal83:
+ return SourceLanguage::Pascal;
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ return SourceLanguage::Cobol;
+ case dwarf::DW_LANG_Java:
+ return SourceLanguage::Java;
+ default:
+ // There's no CodeView representation for this language, and CV doesn't
+ // have an "unknown" option for the language field, so we'll use MASM,
+ // as it's very low level.
+ return SourceLanguage::Masm;
+ }
+}
+
+struct Version {
+ int Part[4];
+};
+
+// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
+// the version number.
+static Version parseVersion(StringRef Name) {
+ Version V = {{0}};
+ int N = 0;
+ for (const char C : Name) {
+ if (isdigit(C)) {
+ V.Part[N] *= 10;
+ V.Part[N] += C - '0';
+ } else if (C == '.') {
+ ++N;
+ if (N >= 4)
+ return V;
+ } else if (N > 0)
+ return V;
+ }
+ return V;
+}
+
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+ switch (Type) {
+ case Triple::ArchType::x86:
+ return CPUType::Pentium3;
+ case Triple::ArchType::x86_64:
+ return CPUType::X64;
+ case Triple::ArchType::thumb:
+ return CPUType::Thumb;
+ default:
+ report_fatal_error("target architecture doesn't map to a CodeView "
+ "CPUType");
+ }
+}
+
+} // anonymous namespace
+
+void CodeViewDebug::emitCompilerInformation() {
+ MCContext &Context = MMI->getContext();
+ MCSymbol *CompilerBegin = Context.createTempSymbol(),
+ *CompilerEnd = Context.createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
+ OS.EmitLabel(CompilerBegin);
+ OS.AddComment("Record kind: S_COMPILE3");
+ OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+ uint32_t Flags = 0;
+
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ // The low byte of the flags indicates the source language.
+ Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+ // TODO: Figure out which other flags need to be set.
+
+ OS.AddComment("Flags and language");
+ OS.EmitIntValue(Flags, 4);
+
+ OS.AddComment("CPUType");
+ CPUType CPU =
+ mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+ OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+
+ StringRef CompilerVersion = CU->getProducer();
+ Version FrontVer = parseVersion(CompilerVersion);
+ OS.AddComment("Frontend version");
+ for (int N = 0; N < 4; ++N)
+ OS.EmitIntValue(FrontVer.Part[N], 2);
+
+ // Some Microsoft tools, like Binscope, expect a backend version number of at
+ // least 8.something, so we'll coerce the LLVM version into a form that
+ // guarantees it'll be big enough without really lying about the version.
+ int Major = 1000 * LLVM_VERSION_MAJOR +
+ 10 * LLVM_VERSION_MINOR +
+ LLVM_VERSION_PATCH;
+ // Clamp it for builds that use unusually large version numbers.
+ Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max());
+ Version BackVer = {{ Major, 0, 0, 0 }};
+ OS.AddComment("Backend version");
+ for (int N = 0; N < 4; ++N)
+ OS.EmitIntValue(BackVer.Part[N], 2);
+
+ OS.AddComment("Null-terminated compiler version string");
+ emitNullTerminatedSymbolName(OS, CompilerVersion);
+
+ OS.EmitLabel(CompilerEnd);
}
void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -525,17 +675,6 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
endCVSubsection(InlineEnd);
}
-void CodeViewDebug::collectInlineSiteChildren(
- SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
- const InlineSite &Site) {
- for (const DILocation *ChildSiteLoc : Site.ChildSites) {
- auto I = FI.InlineSites.find(ChildSiteLoc);
- const InlineSite &ChildSite = I->second;
- Children.push_back(ChildSite.SiteFuncId);
- collectInlineSiteChildren(Children, FI, ChildSite);
- }
-}
-
void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
const DILocation *InlinedAt,
const InlineSite &Site) {
@@ -561,11 +700,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
unsigned StartLineNum = Site.Inlinee->getLine();
- SmallVector<unsigned, 3> SecondaryFuncIds;
- collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
- FI.Begin, FI.End, SecondaryFuncIds);
+ FI.Begin, FI.End);
OS.EmitLabel(InlineEnd);
@@ -641,13 +778,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
OS.EmitLabel(ProcRecordBegin);
- if (GV->hasLocalLinkage()) {
- OS.AddComment("Record kind: S_LPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
- } else {
- OS.AddComment("Record kind: S_GPROC32_ID");
- OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
- }
+ if (GV->hasLocalLinkage()) {
+ OS.AddComment("Record kind: S_LPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
+ } else {
+ OS.AddComment("Record kind: S_GPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
+ }
// These fields are filled in by tools like CVPACK which run after the fact.
OS.AddComment("PtrParent");
@@ -667,7 +804,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function type index");
OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
OS.AddComment("Function section relative address");
- OS.EmitCOFFSecRel32(Fn);
+ OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Function section index");
OS.EmitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
@@ -711,29 +848,33 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
DR.InMemory = -1;
DR.DataOffset = Offset;
assert(DR.DataOffset == Offset && "truncation");
+ DR.IsSubfield = 0;
DR.StructOffset = 0;
DR.CVRegister = CVRegister;
return DR;
}
CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeReg(uint16_t CVRegister) {
+CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
+ int Offset, bool IsSubfield,
+ uint16_t StructOffset) {
LocalVarDefRange DR;
- DR.InMemory = 0;
- DR.DataOffset = 0;
- DR.StructOffset = 0;
+ DR.InMemory = InMemory;
+ DR.DataOffset = Offset;
+ DR.IsSubfield = IsSubfield;
+ DR.StructOffset = StructOffset;
DR.CVRegister = CVRegister;
return DR;
}
-void CodeViewDebug::collectVariableInfoFromMMITable(
+void CodeViewDebug::collectVariableInfoFromMFTable(
DenseSet<InlinedVariable> &Processed) {
- const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget();
+ const MachineFunction &MF = *Asm->MF;
+ const TargetSubtargetInfo &TSI = MF.getSubtarget();
const TargetFrameLowering *TFI = TSI.getFrameLowering();
const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
- for (const MachineModuleInfo::VariableDbgInfo &VI :
- MMI->getVariableDbgInfo()) {
+ for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -770,7 +911,7 @@ void CodeViewDebug::collectVariableInfoFromMMITable(
void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
DenseSet<InlinedVariable> Processed;
// Grab the variable info that was squirreled away in the MMI side-table.
- collectVariableInfoFromMMITable(Processed);
+ collectVariableInfoFromMFTable(Processed);
const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
@@ -802,10 +943,17 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
const MachineInstr *DVInst = Range.first;
assert(DVInst->isDebugValue() && "Invalid History entry");
const DIExpression *DIExpr = DVInst->getDebugExpression();
-
- // Bail if there is a complex DWARF expression for now.
- if (DIExpr && DIExpr->getNumElements() > 0)
- continue;
+ bool IsSubfield = false;
+ unsigned StructOffset = 0;
+
+ // Handle fragments.
+ auto Fragment = DIExpr->getFragmentInfo();
+ if (DIExpr && Fragment) {
+ IsSubfield = true;
+ StructOffset = Fragment->OffsetInBits / 8;
+ } else if (DIExpr && DIExpr->getNumElements() > 0) {
+ continue; // Ignore unrecognized exprs.
+ }
// Bail if operand 0 is not a valid register. This means the variable is a
// simple constant, or is described by a complex expression.
@@ -817,19 +965,20 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
continue;
// Handle the two cases we can handle: indirect in memory and in register.
- bool IsIndirect = DVInst->getOperand(1).isImm();
- unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg());
+ unsigned CVReg = TRI->getCodeViewRegNum(Reg);
+ bool InMemory = DVInst->getOperand(1).isImm();
+ int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0;
{
- LocalVarDefRange DefRange;
- if (IsIndirect) {
- int64_t Offset = DVInst->getOperand(1).getImm();
- DefRange = createDefRangeMem(CVReg, Offset);
- } else {
- DefRange = createDefRangeReg(CVReg);
- }
+ LocalVarDefRange DR;
+ DR.CVRegister = CVReg;
+ DR.InMemory = InMemory;
+ DR.DataOffset = Offset;
+ DR.IsSubfield = IsSubfield;
+ DR.StructOffset = StructOffset;
+
if (Var.DefRanges.empty() ||
- Var.DefRanges.back().isDifferentLocation(DefRange)) {
- Var.DefRanges.emplace_back(std::move(DefRange));
+ Var.DefRanges.back().isDifferentLocation(DR)) {
+ Var.DefRanges.emplace_back(std::move(DR));
}
}
@@ -837,8 +986,14 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
if (!End) {
- if (std::next(I) != E)
- End = getLabelBeforeInsn(std::next(I)->first);
+ // This range is valid until the next overlapping bitpiece. In the
+ // common case, ranges will not be bitpieces, so they will overlap.
+ auto J = std::next(I);
+ while (J != E &&
+ !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+ ++J;
+ if (J != E)
+ End = getLabelBeforeInsn(J->first);
else
End = Asm->getFunctionEnd();
}
@@ -873,6 +1028,8 @@ void CodeViewDebug::beginFunction(const MachineFunction *MF) {
CurFn->FuncId = NextFuncId++;
CurFn->Begin = Asm->getFunctionBegin();
+ OS.EmitCVFuncIdDirective(CurFn->FuncId);
+
// Find the end of the function prolog. First known non-DBG_VALUE and
// non-frame setup location marks the beginning of the function body.
// FIXME: is there a simpler a way to do this? Can we just search
@@ -933,6 +1090,9 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
case dwarf::DW_TAG_base_type:
return lowerTypeBasic(cast<DIBasicType>(Ty));
case dwarf::DW_TAG_pointer_type:
+ if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type")
+ return lowerTypeVFTableShape(cast<DIDerivedType>(Ty));
+ LLVM_FALLTHROUGH;
case dwarf::DW_TAG_reference_type:
case dwarf::DW_TAG_rvalue_reference_type:
return lowerTypePointer(cast<DIDerivedType>(Ty));
@@ -940,6 +1100,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
case dwarf::DW_TAG_const_type:
case dwarf::DW_TAG_volatile_type:
+ // TODO: add support for DW_TAG_atomic_type here
return lowerTypeModifier(cast<DIDerivedType>(Ty));
case dwarf::DW_TAG_subroutine_type:
if (ClassTy) {
@@ -989,20 +1150,25 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
- bool UndefinedSubrange = false;
- // FIXME:
- // There is a bug in the front-end where an array of a structure, which was
- // declared as incomplete structure first, ends up not getting a size assigned
- // to it. (PR28303)
+ // We want to assert that the element type multiplied by the array lengths
+ // match the size of the overall array. However, if we don't have complete
+ // type information for the base type, we can't make this assertion. This
+ // happens if limited debug info is enabled in this case:
+ // struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
+ // VTableOptzn array[3];
+ // The DICompositeType of VTableOptzn will have size zero, and the array will
+ // have size 3 * sizeof(void*), and we should avoid asserting.
+ //
+ // There is a related bug in the front-end where an array of a structure,
+ // which was declared as incomplete structure first, ends up not getting a
+ // size assigned to it. (PR28303)
// Example:
// struct A(*p)[3];
// struct A { int f; } a[3];
- //
- // This needs to be fixed in the front-end, but in the meantime we don't want
- // to trigger an assertion because of this.
- if (Ty->getSizeInBits() == 0) {
- UndefinedSubrange = true;
+ bool PartiallyIncomplete = false;
+ if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
+ PartiallyIncomplete = true;
}
// Add subranges to array type.
@@ -1021,18 +1187,24 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
if (Count == -1) {
Count = 1;
- UndefinedSubrange = true;
+ PartiallyIncomplete = true;
}
- StringRef Name = (i == 0) ? Ty->getName() : "";
// Update the element size and element type index for subsequent subranges.
ElementSize *= Count;
- ElementTypeIndex = TypeTable.writeArray(
- ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name));
+
+ // If this is the outermost array, use the size from the array. It will be
+ // more accurate if PartiallyIncomplete is true.
+ uint64_t ArraySize =
+ (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
+
+ StringRef Name = (i == 0) ? Ty->getName() : "";
+ ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
+ ElementTypeIndex = TypeTable.writeKnownType(AR);
}
- (void)UndefinedSubrange;
- assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8));
+ (void)PartiallyIncomplete;
+ assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
return ElementTypeIndex;
}
@@ -1080,20 +1252,20 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
break;
case dwarf::DW_ATE_signed:
switch (ByteSize) {
- case 1: STK = SimpleTypeKind::SByte; break;
- case 2: STK = SimpleTypeKind::Int16Short; break;
- case 4: STK = SimpleTypeKind::Int32; break;
- case 8: STK = SimpleTypeKind::Int64Quad; break;
- case 16: STK = SimpleTypeKind::Int128Oct; break;
+ case 1: STK = SimpleTypeKind::SignedCharacter; break;
+ case 2: STK = SimpleTypeKind::Int16Short; break;
+ case 4: STK = SimpleTypeKind::Int32; break;
+ case 8: STK = SimpleTypeKind::Int64Quad; break;
+ case 16: STK = SimpleTypeKind::Int128Oct; break;
}
break;
case dwarf::DW_ATE_unsigned:
switch (ByteSize) {
- case 1: STK = SimpleTypeKind::Byte; break;
- case 2: STK = SimpleTypeKind::UInt16Short; break;
- case 4: STK = SimpleTypeKind::UInt32; break;
- case 8: STK = SimpleTypeKind::UInt64Quad; break;
- case 16: STK = SimpleTypeKind::UInt128Oct; break;
+ case 1: STK = SimpleTypeKind::UnsignedCharacter; break;
+ case 2: STK = SimpleTypeKind::UInt16Short; break;
+ case 4: STK = SimpleTypeKind::UInt32; break;
+ case 8: STK = SimpleTypeKind::UInt64Quad; break;
+ case 16: STK = SimpleTypeKind::UInt128Oct; break;
}
break;
case dwarf::DW_ATE_UTF:
@@ -1133,13 +1305,6 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
- // While processing the type being pointed to it is possible we already
- // created this pointer type. If so, we check here and return the existing
- // pointer type.
- auto I = TypeIndices.find({Ty, nullptr});
- if (I != TypeIndices.end())
- return I->second;
-
// Pointers to simple types can use SimpleTypeMode, rather than having a
// dedicated pointer type record.
if (PointeeTI.isSimple() &&
@@ -1171,7 +1336,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
// do.
PointerOptions PO = PointerOptions::None;
PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
- return TypeTable.writePointer(PR);
+ return TypeTable.writeKnownType(PR);
}
static PointerToMemberRepresentation
@@ -1222,7 +1387,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
MemberPointerInfo MPI(
ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
- return TypeTable.writePointer(PR);
+ return TypeTable.writeKnownType(PR);
}
/// Given a DWARF calling convention, get the CodeView equivalent. If we don't
@@ -1244,7 +1409,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
bool IsModifier = true;
const DIType *BaseTy = Ty;
while (IsModifier && BaseTy) {
- // FIXME: Need to add DWARF tag for __unaligned.
+ // FIXME: Need to add DWARF tags for __unaligned and _Atomic
switch (BaseTy->getTag()) {
case dwarf::DW_TAG_const_type:
Mods |= ModifierOptions::Const;
@@ -1260,16 +1425,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
}
TypeIndex ModifiedTI = getTypeIndex(BaseTy);
-
- // While processing the type being pointed to, it is possible we already
- // created this modifier type. If so, we check here and return the existing
- // modifier type.
- auto I = TypeIndices.find({Ty, nullptr});
- if (I != TypeIndices.end())
- return I->second;
-
ModifierRecord MR(ModifiedTI, Mods);
- return TypeTable.writeModifier(MR);
+ return TypeTable.writeKnownType(MR);
}
TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
@@ -1286,13 +1443,13 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
}
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
- TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+ TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
ArgTypeIndices.size(), ArgListIndex);
- return TypeTable.writeProcedure(Procedure);
+ return TypeTable.writeKnownType(Procedure);
}
TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
@@ -1319,20 +1476,29 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
}
ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
- TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+ TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
// TODO: Need to use the correct values for:
// FunctionOptions
// ThisPointerAdjustment.
- TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord(
- ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None,
- ArgTypeIndices.size(), ArgListIndex, ThisAdjustment));
+ MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
+ FunctionOptions::None, ArgTypeIndices.size(),
+ ArgListIndex, ThisAdjustment);
+ TypeIndex TI = TypeTable.writeKnownType(MFR);
return TI;
}
+TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
+ unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+ SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
+
+ VFTableShapeRecord VFTSR(Slots);
+ return TypeTable.writeKnownType(VFTSR);
+}
+
static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
switch (Flags & DINode::FlagAccessibility) {
case DINode::FlagPrivate: return MemberAccess::Private;
@@ -1420,25 +1586,28 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
if (Ty->isForwardDecl()) {
CO |= ClassOptions::ForwardReference;
} else {
- FieldListRecordBuilder Fields;
+ FieldListRecordBuilder FLRB(TypeTable);
+
+ FLRB.begin();
for (const DINode *Element : Ty->getElements()) {
// We assume that the frontend provides all members in source declaration
// order, which is what MSVC does.
if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
- Fields.writeEnumerator(EnumeratorRecord(
- MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()),
- Enumerator->getName()));
+ EnumeratorRecord ER(MemberAccess::Public,
+ APSInt::getUnsigned(Enumerator->getValue()),
+ Enumerator->getName());
+ FLRB.writeMemberType(ER);
EnumeratorCount++;
}
}
- FTI = TypeTable.writeFieldList(Fields);
+ FTI = FLRB.end();
}
std::string FullName = getFullyQualifiedName(Ty);
- return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName,
- Ty->getIdentifier(),
- getTypeIndex(Ty->getBaseType())));
+ EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
+ getTypeIndex(Ty->getBaseType()));
+ return TypeTable.writeKnownType(ER);
}
//===----------------------------------------------------------------------===//
@@ -1465,6 +1634,8 @@ struct llvm::ClassInfo {
// Direct overloaded methods gathered by name.
MethodsMap Methods;
+ TypeIndex VShapeTI;
+
std::vector<const DICompositeType *> NestedClasses;
};
@@ -1513,11 +1684,13 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
collectMemberInfo(Info, DDTy);
} else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
Info.Inheritance.push_back(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
+ DDTy->getName() == "__vtbl_ptr_type") {
+ Info.VShapeTI = getTypeIndex(DDTy);
} else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
// Ignore friend members. It appears that MSVC emitted info about
// friends in the past, but modern versions do not.
}
- // FIXME: Get Clang to emit function virtual table here and handle it.
} else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
Info.NestedClasses.push_back(Composite);
}
@@ -1533,9 +1706,9 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
ClassOptions CO =
ClassOptions::ForwardReference | getCommonClassOptions(Ty);
std::string FullName = getFullyQualifiedName(Ty);
- TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord(
- Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(),
- TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()));
+ ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
+ FullName, Ty->getIdentifier());
+ TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR);
if (!Ty->isForwardDecl())
DeferredCompleteTypes.push_back(Ty);
return FwdDeclTI;
@@ -1559,14 +1732,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
- TypeIndex ClassTI = TypeTable.writeClass(ClassRecord(
- Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI,
- TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()));
+ ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
+ SizeInBytes, FullName, Ty->getIdentifier());
+ TypeIndex ClassTI = TypeTable.writeKnownType(CR);
- TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
- ClassTI, TypeTable.writeStringId(StringIdRecord(
- TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
- Ty->getLine()));
+ StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+ TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+ UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
+ TypeTable.writeKnownType(USLR);
addToUDTs(Ty, ClassTI);
@@ -1577,9 +1750,8 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
ClassOptions CO =
ClassOptions::ForwardReference | getCommonClassOptions(Ty);
std::string FullName = getFullyQualifiedName(Ty);
- TypeIndex FwdDeclTI =
- TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0,
- FullName, Ty->getIdentifier()));
+ UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
+ TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR);
if (!Ty->isForwardDecl())
DeferredCompleteTypes.push_back(Ty);
return FwdDeclTI;
@@ -1599,14 +1771,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
std::string FullName = getFullyQualifiedName(Ty);
- TypeIndex UnionTI = TypeTable.writeUnion(
- UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName,
- Ty->getIdentifier()));
+ UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
+ Ty->getIdentifier());
+ TypeIndex UnionTI = TypeTable.writeKnownType(UR);
- TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
- UnionTI, TypeTable.writeStringId(StringIdRecord(
- TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
- Ty->getLine()));
+ StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+ TypeIndex SIRI = TypeTable.writeKnownType(SIR);
+ UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
+ TypeTable.writeKnownType(USLR);
addToUDTs(Ty, UnionTI);
@@ -1621,7 +1793,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
// list record.
unsigned MemberCount = 0;
ClassInfo Info = collectClassInfo(Ty);
- FieldListRecordBuilder Fields;
+ FieldListRecordBuilder FLBR(TypeTable);
+ FLBR.begin();
// Create base classes.
for (const DIDerivedType *I : Info.Inheritance) {
@@ -1631,16 +1804,22 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
unsigned VBPtrOffset = 0;
// FIXME: Despite the accessor name, the offset is really in bytes.
unsigned VBTableIndex = I->getOffsetInBits() / 4;
- Fields.writeVirtualBaseClass(VirtualBaseClassRecord(
- translateAccessFlags(Ty->getTag(), I->getFlags()),
+ auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase
+ ? TypeRecordKind::IndirectVirtualBaseClass
+ : TypeRecordKind::VirtualBaseClass;
+ VirtualBaseClassRecord VBCR(
+ RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()),
getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
- VBTableIndex));
+ VBTableIndex);
+
+ FLBR.writeMemberType(VBCR);
} else {
assert(I->getOffsetInBits() % 8 == 0 &&
"bases must be on byte boundaries");
- Fields.writeBaseClass(BaseClassRecord(
- translateAccessFlags(Ty->getTag(), I->getFlags()),
- getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8));
+ BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()),
+ I->getOffsetInBits() / 8);
+ FLBR.writeMemberType(BCR);
}
}
@@ -1653,8 +1832,17 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
translateAccessFlags(Ty->getTag(), Member->getFlags());
if (Member->isStaticMember()) {
- Fields.writeStaticDataMember(
- StaticDataMemberRecord(Access, MemberBaseType, MemberName));
+ StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
+ FLBR.writeMemberType(SDMR);
+ MemberCount++;
+ continue;
+ }
+
+ // Virtual function pointer member.
+ if ((Member->getFlags() & DINode::FlagArtificial) &&
+ Member->getName().startswith("_vptr$")) {
+ VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
+ FLBR.writeMemberType(VFPR);
MemberCount++;
continue;
}
@@ -1669,12 +1857,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
}
StartBitOffset -= MemberOffsetInBits;
- MemberBaseType = TypeTable.writeBitField(BitFieldRecord(
- MemberBaseType, Member->getSizeInBits(), StartBitOffset));
+ BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
+ StartBitOffset);
+ MemberBaseType = TypeTable.writeKnownType(BFR);
}
uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
- Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType,
- MemberOffsetInBytes, MemberName));
+ DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
+ MemberName);
+ FLBR.writeMemberType(DMR);
MemberCount++;
}
@@ -1691,33 +1881,32 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
if (Introduced)
VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
- Methods.push_back(
- OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced),
- translateMethodOptionFlags(SP),
- translateAccessFlags(Ty->getTag(), SP->getFlags()),
- VFTableOffset, Name));
+ Methods.push_back(OneMethodRecord(
+ MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()),
+ translateMethodKindFlags(SP, Introduced),
+ translateMethodOptionFlags(SP), VFTableOffset, Name));
MemberCount++;
}
assert(Methods.size() > 0 && "Empty methods map entry");
if (Methods.size() == 1)
- Fields.writeOneMethod(Methods[0]);
+ FLBR.writeMemberType(Methods[0]);
else {
- TypeIndex MethodList =
- TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods));
- Fields.writeOverloadedMethod(
- OverloadedMethodRecord(Methods.size(), MethodList, Name));
+ MethodOverloadListRecord MOLR(Methods);
+ TypeIndex MethodList = TypeTable.writeKnownType(MOLR);
+ OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
+ FLBR.writeMemberType(OMR);
}
}
// Create nested classes.
for (const DICompositeType *Nested : Info.NestedClasses) {
NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
- Fields.writeNestedType(R);
+ FLBR.writeMemberType(R);
MemberCount++;
}
- TypeIndex FieldTI = TypeTable.writeFieldList(Fields);
- return std::make_tuple(FieldTI, TypeIndex(), MemberCount,
+ TypeIndex FieldTI = FLBR.end();
+ return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
!Info.NestedClasses.empty());
}
@@ -1725,7 +1914,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
if (!VBPType.getIndex()) {
// Make a 'const int *' type.
ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
- TypeIndex ModifiedTI = TypeTable.writeModifier(MR);
+ TypeIndex ModifiedTI = TypeTable.writeKnownType(MR);
PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
: PointerKind::Near32;
@@ -1733,7 +1922,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
PointerOptions PO = PointerOptions::None;
PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
- VBPType = TypeTable.writePointer(PR);
+ VBPType = TypeTable.writeKnownType(PR);
}
return VBPType;
@@ -1880,30 +2069,47 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
SmallString<20> BytePrefix;
for (const LocalVarDefRange &DefRange : Var.DefRanges) {
BytePrefix.clear();
- // FIXME: Handle bitpieces.
- if (DefRange.StructOffset != 0)
- continue;
-
if (DefRange.InMemory) {
- DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0,
- 0, 0, ArrayRef<LocalVariableAddrGap>());
+ uint16_t RegRelFlags = 0;
+ if (DefRange.IsSubfield) {
+ RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+ (DefRange.StructOffset
+ << DefRangeRegisterRelSym::OffsetInParentShift);
+ }
+ DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
+ Sym.Hdr.Register = DefRange.CVRegister;
+ Sym.Hdr.Flags = RegRelFlags;
+ Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
BytePrefix +=
StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&Sym.Header),
- sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
- // Unclear what matters here.
- DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0,
- ArrayRef<LocalVariableAddrGap>());
- ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
- BytePrefix +=
- StringRef(reinterpret_cast<const char *>(&Sym.Header),
- sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ if (DefRange.IsSubfield) {
+ // Unclear what matters here.
+ DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
+ Sym.Hdr.Register = DefRange.CVRegister;
+ Sym.Hdr.MayHaveNoName = 0;
+ Sym.Hdr.OffsetInParent = DefRange.StructOffset;
+
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
+ BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+ sizeof(SymKind));
+ BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+ sizeof(Sym.Hdr));
+ } else {
+ // Unclear what matters here.
+ DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
+ Sym.Hdr.Register = DefRange.CVRegister;
+ Sym.Hdr.MayHaveNoName = 0;
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
+ BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+ sizeof(SymKind));
+ BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+ sizeof(Sym.Hdr));
+ }
}
OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
}
@@ -1983,6 +2189,15 @@ void CodeViewDebug::emitDebugInfoForUDTs(
}
void CodeViewDebug::emitDebugInfoForGlobals() {
+ DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
+ GlobalMap;
+ for (const GlobalVariable &GV : MMI->getModule()->globals()) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+ GV.getDebugInfo(GVEs);
+ for (const auto *GVE : GVEs)
+ GlobalMap[GVE] = &GV;
+ }
+
NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
for (const MDNode *Node : CUs->operands()) {
const auto *CU = cast<DICompileUnit>(Node);
@@ -1992,31 +2207,32 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
// it if we have at least one global to emit.
switchToDebugSectionForSymbol(nullptr);
MCSymbol *EndLabel = nullptr;
- for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
- if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ for (const auto *GVE : CU->getGlobalVariables()) {
+ if (const auto *GV = GlobalMap.lookup(GVE))
if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
if (!EndLabel) {
OS.AddComment("Symbol subsection for globals");
EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
}
- emitDebugInfoForGlobal(G, Asm->getSymbol(GV));
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
}
- }
}
if (EndLabel)
endCVSubsection(EndLabel);
// Second, emit each global that is in a comdat into its own .debug$S
// section along with its own symbol substream.
- for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
- if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ for (const auto *GVE : CU->getGlobalVariables()) {
+ if (const auto *GV = GlobalMap.lookup(GVE)) {
if (GV->hasComdat()) {
MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
Twine(GlobalValue::getRealLinkageName(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
- emitDebugInfoForGlobal(G, GVSym);
+ // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+ emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
endCVSubsection(EndLabel);
}
}
@@ -2037,6 +2253,7 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
}
void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+ const GlobalVariable *GV,
MCSymbol *GVSym) {
// DataSym record, see SymbolRecord.h for more info.
// FIXME: Thread local data, etc
@@ -2045,7 +2262,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
OS.AddComment("Record length");
OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
OS.EmitLabel(DataBegin);
- const auto *GV = cast<GlobalVariable>(DIGV->getVariable());
if (DIGV->isLocalToUnit()) {
if (GV->isThreadLocal()) {
OS.AddComment("Record kind: S_LTHREAD32");
@@ -2066,7 +2282,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
OS.AddComment("Type");
OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym);
+ OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index e4bbd61..3dd4315 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCStreamer.h"
@@ -36,7 +36,8 @@ struct ClassInfo;
/// \brief Collects and handles line tables information in a CodeView format.
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
MCStreamer &OS;
- codeview::MemoryTypeTableBuilder TypeTable;
+ llvm::BumpPtrAllocator Allocator;
+ codeview::TypeTableBuilder TypeTable;
/// Represents the most general definition range.
struct LocalVarDefRange {
@@ -47,9 +48,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Offset of variable data in memory.
int DataOffset : 31;
- /// Offset of the data into the user level struct. If zero, no splitting
- /// occurred.
- uint16_t StructOffset;
+ /// Non-zero if this is a piece of an aggregate.
+ uint16_t IsSubfield : 1;
+
+ /// Offset into aggregate.
+ uint16_t StructOffset : 15;
/// Register containing the data or the register base of the memory
/// location containing the data.
@@ -59,14 +62,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// ranges.
bool isDifferentLocation(LocalVarDefRange &O) {
return InMemory != O.InMemory || DataOffset != O.DataOffset ||
- StructOffset != O.StructOffset || CVRegister != O.CVRegister;
+ IsSubfield != O.IsSubfield || StructOffset != O.StructOffset ||
+ CVRegister != O.CVRegister;
}
SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
};
static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
- static LocalVarDefRange createDefRangeReg(uint16_t CVRegister);
+ static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
+ bool InMemory, int Offset,
+ bool IsSubfield,
+ uint16_t StructOffset);
/// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
struct LocalVariable {
@@ -190,6 +197,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitTypeInformation();
+ void emitCompilerInformation();
+
void emitInlineeLinesSubsection();
void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
@@ -201,7 +210,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForUDTs(
ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
- void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym);
+ void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+ const GlobalVariable *GV, MCSymbol *GVSym);
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
@@ -217,7 +227,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void collectVariableInfo(const DISubprogram *SP);
- void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed);
+ void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
/// Records information about a local variable in the appropriate scope. In
/// particular, locals from inlined code live inside the inlining site.
@@ -251,6 +261,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+ codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
const DIType *ClassTy,
int ThisAdjustment);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 2aaa85a..8799189 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -63,10 +63,10 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
///
void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// Emit its Dwarf tag type.
- AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag).data());
// Emit whether it has children DIEs.
- AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children));
+ AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
// For each attribute description.
for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -74,11 +74,18 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// Emit attribute type.
AP->EmitULEB128(AttrData.getAttribute(),
- dwarf::AttributeString(AttrData.getAttribute()));
+ dwarf::AttributeString(AttrData.getAttribute()).data());
// Emit form type.
AP->EmitULEB128(AttrData.getForm(),
- dwarf::FormEncodingString(AttrData.getForm()));
+ dwarf::FormEncodingString(AttrData.getForm()).data());
+
+ // Emit value for DW_FORM_implicit_const.
+ if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) {
+ assert(AP->getDwarfVersion() >= 5 &&
+ "DW_FORM_implicit_const is supported starting from DWARFv5");
+ AP->EmitSLEB128(AttrData.getValue());
+ }
}
// Mark end of abbreviation.
@@ -108,24 +115,73 @@ void DIEAbbrev::print(raw_ostream &O) {
LLVM_DUMP_METHOD
void DIEAbbrev::dump() { print(dbgs()); }
+//===----------------------------------------------------------------------===//
+// DIEAbbrevSet Implementation
+//===----------------------------------------------------------------------===//
+
+DIEAbbrevSet::~DIEAbbrevSet() {
+ for (DIEAbbrev *Abbrev : Abbreviations)
+ Abbrev->~DIEAbbrev();
+}
+
+DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
+
+ FoldingSetNodeID ID;
+ DIEAbbrev Abbrev = Die.generateAbbrev();
+ Abbrev.Profile(ID);
+
+ void *InsertPos;
+ if (DIEAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Die.setAbbrevNumber(Existing->getNumber());
+ return *Existing;
+ }
+
+ // Move the abbreviation to the heap and assign a number.
+ DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev));
+ Abbreviations.push_back(New);
+ New->setNumber(Abbreviations.size());
+ Die.setAbbrevNumber(Abbreviations.size());
+
+ // Store it for lookup.
+ AbbreviationsSet.InsertNode(New, InsertPos);
+ return *New;
+}
+
+void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ AP->OutStreamer->SwitchSection(Section);
+ AP->emitDwarfAbbrevs(Abbreviations);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE *DIE::getParent() const {
+ return Owner.dyn_cast<DIE*>();
+}
+
DIEAbbrev DIE::generateAbbrev() const {
DIEAbbrev Abbrev(Tag, hasChildren());
for (const DIEValue &V : values())
- Abbrev.AddAttribute(V.getAttribute(), V.getForm());
+ if (V.getForm() == dwarf::DW_FORM_implicit_const)
+ Abbrev.AddImplicitConstAttribute(V.getAttribute(),
+ V.getDIEInteger().getValue());
+ else
+ Abbrev.AddAttribute(V.getAttribute(), V.getForm());
return Abbrev;
}
-/// Climb up the parent chain to get the unit DIE to which this DIE
-/// belongs.
-const DIE *DIE::getUnit() const {
- const DIE *Cu = getUnitOrNull();
- assert(Cu && "We should not have orphaned DIEs.");
- return Cu;
+unsigned DIE::getDebugSectionOffset() const {
+ const DIEUnit *Unit = getUnit();
+ assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
+ return Unit->getDebugSectionOffset() + getOffset();
}
-/// Climb up the parent chain to get the unit DIE this DIE belongs
-/// to. Return NULL if DIE is not added to an owner yet.
-const DIE *DIE::getUnitOrNull() const {
+const DIE *DIE::getUnitDie() const {
const DIE *p = this;
while (p) {
if (p->getTag() == dwarf::DW_TAG_compile_unit ||
@@ -136,6 +192,13 @@ const DIE *DIE::getUnitOrNull() const {
return nullptr;
}
+const DIEUnit *DIE::getUnit() const {
+ const DIE *UnitDie = getUnitDie();
+ if (UnitDie)
+ return UnitDie->Owner.dyn_cast<DIEUnit*>();
+ return nullptr;
+}
+
DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
// Iterate through all the attributes until we find the one we're
// looking for, if we can't find it return NULL.
@@ -191,6 +254,55 @@ void DIE::dump() {
print(dbgs());
}
+unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
+ DIEAbbrevSet &AbbrevSet,
+ unsigned CUOffset) {
+ // Unique the abbreviation and fill in the abbreviation number so this DIE
+ // can be emitted.
+ const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this);
+
+ // Set compile/type unit relative offset of this DIE.
+ setOffset(CUOffset);
+
+ // Add the byte size of the abbreviation code.
+ CUOffset += getULEB128Size(getAbbrevNumber());
+
+ // Add the byte size of all the DIE attribute values.
+ for (const auto &V : values())
+ CUOffset += V.SizeOf(AP);
+
+ // Let the children compute their offsets and abbreviation numbers.
+ if (hasChildren()) {
+ (void)Abbrev;
+ assert(Abbrev.hasChildren() && "Children flag not set");
+
+ for (auto &Child : children())
+ CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset);
+
+ // Each child chain is terminated with a zero byte, adjust the offset.
+ CUOffset += sizeof(int8_t);
+ }
+
+ // Compute the byte size of this DIE and all of its children correctly. This
+ // is needed so that top level DIE can help the compile unit set its length
+ // correctly.
+ setSize(CUOffset - getOffset());
+ return CUOffset;
+}
+
+//===----------------------------------------------------------------------===//
+// DIEUnit Implementation
+//===----------------------------------------------------------------------===//
+DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
+ : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V),
+ AddrSize(A)
+{
+ Die.Owner = this;
+ assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_type_unit ||
+ UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+}
+
void DIEValue::EmitValue(const AsmPrinter *AP) const {
switch (Ty) {
case isNone:
@@ -240,67 +352,121 @@ void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
///
void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
- unsigned Size = ~0U;
switch (Form) {
+ case dwarf::DW_FORM_implicit_const:
+ LLVM_FALLTHROUGH;
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
Asm->OutStreamer->AddBlankLine();
return;
- case dwarf::DW_FORM_flag: // Fall thru
- case dwarf::DW_FORM_ref1: // Fall thru
- case dwarf::DW_FORM_data1: Size = 1; break;
- case dwarf::DW_FORM_ref2: // Fall thru
- case dwarf::DW_FORM_data2: Size = 2; break;
- case dwarf::DW_FORM_sec_offset: // Fall thru
- case dwarf::DW_FORM_strp: // Fall thru
- case dwarf::DW_FORM_ref4: // Fall thru
- case dwarf::DW_FORM_data4: Size = 4; break;
- case dwarf::DW_FORM_ref8: // Fall thru
- case dwarf::DW_FORM_ref_sig8: // Fall thru
- case dwarf::DW_FORM_data8: Size = 8; break;
- case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
- case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
- case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
- case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_flag:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref1:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_data1:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref2:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_data2:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strp:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref4:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_data4:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref8:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sig8:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_data8:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_ref_alt:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_strp_alt:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_line_strp:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_sec_offset:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strp_sup:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup:
+ LLVM_FALLTHROUGH;
case dwarf::DW_FORM_addr:
- Size = Asm->getPointerSize();
- break;
+ LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_addr:
- Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
- break;
+ Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
+ return;
+ case dwarf::DW_FORM_GNU_str_index:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_addr_index:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_udata:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_udata:
+ Asm->EmitULEB128(Integer);
+ return;
+ case dwarf::DW_FORM_sdata:
+ Asm->EmitSLEB128(Integer);
+ return;
default: llvm_unreachable("DIE Value form not supported yet");
}
- Asm->OutStreamer->EmitIntValue(Integer, Size);
}
/// SizeOf - Determine size of integer value in bytes.
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
+ case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH;
case dwarf::DW_FORM_flag_present: return 0;
- case dwarf::DW_FORM_flag: // Fall thru
- case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_flag: LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref1: LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data1: return sizeof(int8_t);
- case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_ref2: LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data2: return sizeof(int16_t);
- case dwarf::DW_FORM_sec_offset: // Fall thru
- case dwarf::DW_FORM_strp: // Fall thru
- case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_ref4: LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data4: return sizeof(int32_t);
- case dwarf::DW_FORM_ref8: // Fall thru
- case dwarf::DW_FORM_ref_sig8: // Fall thru
+ case dwarf::DW_FORM_ref8: LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sig8: LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data8: return sizeof(int64_t);
- case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer);
- case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
- case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
- case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
- case dwarf::DW_FORM_addr:
- return AP->getPointerSize();
case dwarf::DW_FORM_ref_addr:
- if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+ if (AP->getDwarfVersion() == 2)
return AP->getPointerSize();
- return sizeof(int32_t);
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strp:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_ref_alt:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_strp_alt:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_line_strp:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_sec_offset:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strp_sup:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup:
+ switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+ case dwarf::DWARF32:
+ return 4;
+ case dwarf::DWARF64:
+ return 8;
+ }
+ llvm_unreachable("Invalid DWARF format");
+ case dwarf::DW_FORM_GNU_str_index:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_GNU_addr_index:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_udata:
+ LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_udata:
+ return getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata:
+ return getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr:
+ return AP->getPointerSize();
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -318,7 +484,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+ AP->EmitDebugValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
@@ -343,7 +509,8 @@ void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelReference(Label, SizeOf(AP, Form),
Form == dwarf::DW_FORM_strp ||
Form == dwarf::DW_FORM_sec_offset ||
- Form == dwarf::DW_FORM_ref_addr);
+ Form == dwarf::DW_FORM_ref_addr ||
+ Form == dwarf::DW_FORM_data4);
}
/// SizeOf - Determine size of label value in bytes.
@@ -435,6 +602,29 @@ void DIEString::print(raw_ostream &O) const {
}
//===----------------------------------------------------------------------===//
+// DIEInlineString Implementation
+//===----------------------------------------------------------------------===//
+void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_string) {
+ for (char ch : S)
+ AP->EmitInt8(ch);
+ AP->EmitInt8(0);
+ return;
+ }
+ llvm_unreachable("Expected valid string form");
+}
+
+unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ // Emit string bytes + NULL byte.
+ return S.size() + 1;
+}
+
+LLVM_DUMP_METHOD
+void DIEInlineString::print(raw_ostream &O) const {
+ O << "InlineString: " << S;
+}
+
+//===----------------------------------------------------------------------===//
// DIEEntry Implementation
//===----------------------------------------------------------------------===//
@@ -442,35 +632,69 @@ void DIEString::print(raw_ostream &O) const {
///
void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_ref_addr) {
- const DwarfDebug *DD = AP->getDwarfDebug();
- unsigned Addr = Entry->getOffset();
- assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
- // For DW_FORM_ref_addr, output the offset from beginning of debug info
- // section. Entry->getOffset() returns the offset from start of the
- // compile unit.
- DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
- assert(CU && "CUDie should belong to a CU.");
- Addr += CU->getDebugInfoOffset();
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
- AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
- DIEEntry::getRefAddrSize(AP));
- else
- AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
- } else
- AP->EmitInt32(Entry->getOffset());
-}
-
-unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
- // DWARF4: References that use the attribute form DW_FORM_ref_addr are
- // specified to be four bytes in the DWARF 32-bit format and eight bytes
- // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
- // references have the same size as an address on the target system.
- const DwarfDebug *DD = AP->getDwarfDebug();
- assert(DD && "Expected Dwarf Debug info to be available");
- if (DD->getDwarfVersion() == 2)
- return AP->getPointerSize();
- return sizeof(int32_t);
+ switch (Form) {
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_ref8:
+ AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form));
+ return;
+
+ case dwarf::DW_FORM_ref_udata:
+ AP->EmitULEB128(Entry->getOffset());
+ return;
+
+ case dwarf::DW_FORM_ref_addr: {
+ // Get the absolute offset for this DIE within the debug info/types section.
+ unsigned Addr = Entry->getDebugSectionOffset();
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+ const DwarfDebug *DD = AP->getDwarfDebug();
+ if (DD)
+ assert(!DD->useSplitDwarf() &&
+ "TODO: dwo files can't have relocations.");
+ const DIEUnit *Unit = Entry->getUnit();
+ assert(Unit && "CUDie should belong to a CU.");
+ MCSection *Section = Unit->getSection();
+ if (Section) {
+ const MCSymbol *SectionSym = Section->getBeginSymbol();
+ AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+ return;
+ }
+ }
+ AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
+ return;
+ }
+ default:
+ llvm_unreachable("Improper form for DIE reference");
+ }
+}
+
+unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_ref1:
+ return 1;
+ case dwarf::DW_FORM_ref2:
+ return 2;
+ case dwarf::DW_FORM_ref4:
+ return 4;
+ case dwarf::DW_FORM_ref8:
+ return 8;
+ case dwarf::DW_FORM_ref_udata:
+ return getULEB128Size(Entry->getOffset());
+ case dwarf::DW_FORM_ref_addr:
+ if (AP->getDwarfVersion() == 2)
+ return AP->getPointerSize();
+ switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+ case dwarf::DWARF32:
+ return 4;
+ case dwarf::DWARF64:
+ return 8;
+ }
+ llvm_unreachable("Invalid DWARF format");
+
+ default:
+ llvm_unreachable("Improper form for DIE reference");
+ }
}
LLVM_DUMP_METHOD
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 74c47d1..d8ecc7c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -330,6 +330,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
addULEB128(dwarf::DW_FORM_string);
addString(Value.getDIEString().getString());
break;
+ case DIEValue::isInlineString:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_string);
+ addString(Value.getDIEInlineString().getString());
+ break;
case DIEValue::isBlock:
case DIEValue::isLoc:
case DIEValue::isLocList:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index adc536f..22fd7bb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -31,7 +31,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
assert(MI.getNumOperands() == 4);
// If location of variable is described using a register (directly or
- // indirecltly), this register is always a first operand.
+ // indirectly), this register is always a first operand.
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
}
@@ -83,7 +83,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
const auto &I = RegVars.find(RegNo);
assert(RegNo != 0U && I != RegVars.end());
auto &VarSet = I->second;
- const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var);
+ const auto &VarPos = find(VarSet, Var);
assert(VarPos != VarSet.end());
VarSet.erase(VarPos);
// Don't keep empty sets in a map to keep it as small as possible.
@@ -96,7 +96,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
InlinedVariable Var) {
assert(RegNo != 0U);
auto &VarSet = RegVars[RegNo];
- assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
+ assert(!is_contained(VarSet, Var));
VarSet.push_back(Var);
}
@@ -134,8 +134,8 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
// as the return instruction.
DebugLoc LastLoc = LastMI->getDebugLoc();
auto Res = LastMI;
- for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
- E = MBB.rend();
+ for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
+ E = MBB.rend();
I != E; ++I) {
if (I->getDebugLoc() != LastLoc)
return &*Res;
@@ -164,7 +164,9 @@ static void collectChangingRegs(const MachineFunction *MF,
// Look for register defs and register masks. Register masks are
// typically on calls and they clobber everything not in the mask.
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Skip virtual registers since they are handled by the parent.
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ !TRI->isVirtualRegister(MO.getReg())) {
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
++AI)
Regs.set(*AI);
@@ -192,12 +194,18 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// some variables.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // If this is a virtual register, only clobber it since it doesn't
+ // have aliases.
+ if (TRI->isVirtualRegister(MO.getReg()))
+ clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
// If this is a register def operand, it may end a debug value
// range.
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI)
- if (ChangingRegs.test(*AI))
- clobberRegisterUses(RegVars, *AI, Result, MI);
+ else {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ if (ChangingRegs.test(*AI))
+ clobberRegisterUses(RegVars, *AI, Result, MI);
+ }
} else if (MO.isRegMask()) {
// If this is a register mask operand, clobber all debug values in
// non-CSRs.
@@ -238,7 +246,8 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
if (!MBB.empty() && &MBB != &MF->back()) {
for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
auto CurElem = I++; // CurElem can be erased below.
- if (ChangingRegs.test(CurElem->first))
+ if (TRI->isVirtualRegister(CurElem->first) ||
+ ChangingRegs.test(CurElem->first))
clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 16ffe2e..9419098 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -62,14 +63,14 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
return LabelsAfterInsn.lookup(MI);
}
-// Determine the relative position of the pieces described by P1 and P2.
-// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-// 1 if P1 is entirely after P2.
-int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
- unsigned l1 = P1->getBitPieceOffset();
- unsigned l2 = P2->getBitPieceOffset();
- unsigned r1 = l1 + P1->getBitPieceSize();
- unsigned r2 = l2 + P2->getBitPieceSize();
+int DebugHandlerBase::fragmentCmp(const DIExpression *P1,
+ const DIExpression *P2) {
+ auto Fragment1 = *P1->getFragmentInfo();
+ auto Fragment2 = *P2->getFragmentInfo();
+ unsigned l1 = Fragment1.OffsetInBits;
+ unsigned l2 = Fragment2.OffsetInBits;
+ unsigned r1 = l1 + Fragment1.SizeInBits;
+ unsigned r2 = l2 + Fragment2.SizeInBits;
if (r1 <= l2)
return -1;
else if (r2 <= l1)
@@ -78,11 +79,11 @@ int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
return 0;
}
-/// Determine whether two variable pieces overlap.
-bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
- if (!P1->isBitPiece() || !P2->isBitPiece())
+bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1,
+ const DIExpression *P2) {
+ if (!P1->isFragment() || !P2->isFragment())
return true;
- return pieceCmp(P1, P2) == 0;
+ return fragmentCmp(P1, P2) == 0;
}
/// If this type is derived from a base type then return base type size.
@@ -97,7 +98,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
- Tag != dwarf::DW_TAG_restrict_type)
+ Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
return DDTy->getSizeInBits();
DIType *BaseType = DDTy->getBaseType().resolve();
@@ -141,14 +142,15 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
if (DIVar->isParameter() &&
getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
- if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
- // Mark all non-overlapping initial pieces.
+ if (Ranges.front().first->getDebugExpression()->isFragment()) {
+ // Mark all non-overlapping initial fragments.
for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
- const DIExpression *Piece = I->first->getDebugExpression();
+ const DIExpression *Fragment = I->first->getDebugExpression();
if (std::all_of(Ranges.begin(), I,
[&](DbgValueHistoryMap::InstrRange Pred) {
- return !piecesOverlap(Piece, Pred.first->getDebugExpression());
- }))
+ return !fragmentsOverlap(
+ Fragment, Pred.first->getDebugExpression());
+ }))
LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
else
break;
@@ -200,8 +202,10 @@ void DebugHandlerBase::endInstruction() {
assert(CurMI != nullptr);
// Don't create a new label after DBG_VALUE instructions.
// They don't generate code.
- if (!CurMI->isDebugValue())
+ if (!CurMI->isDebugValue()) {
PrevLabel = nullptr;
+ PrevInstBB = CurMI->getParent();
+ }
DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
LabelsAfterInsn.find(CurMI);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index b8bbcec..c00fa18 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -38,10 +38,12 @@ protected:
MachineModuleInfo *MMI;
/// Previous instruction's location information. This is used to
- /// determine label location to indicate scope boundries in dwarf
- /// debug info.
+ /// determine label location to indicate scope boundaries in debug info.
+ /// We track the previous instruction's source location (if not line 0),
+ /// whether it was a label, and its parent BB.
DebugLoc PrevInstLoc;
MCSymbol *PrevLabel = nullptr;
+ const MachineBasicBlock *PrevInstBB = nullptr;
/// This location indicates end of function prologue and beginning of
/// function body.
@@ -92,13 +94,13 @@ public:
/// Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- /// Determine the relative position of the pieces described by P1 and P2.
- /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
- /// 1 if P1 is entirely after P2.
- static int pieceCmp(const DIExpression *P1, const DIExpression *P2);
+ /// Determine the relative position of the fragments described by P1 and P2.
+ /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is
+ /// entirely after P2.
+ static int fragmentCmp(const DIExpression *P1, const DIExpression *P2);
- /// Determine whether two variable pieces overlap.
- static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2);
+ /// Determine whether two variable fragments overlap.
+ static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2);
/// If this type is derived from a base type then return base type size.
static uint64_t getBaseTypeSize(const DITypeRef TyRef);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 20acd45..36fb150 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -72,7 +72,7 @@ public:
const ConstantFP *getConstantFP() const { return Constant.CFP; }
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
- bool isBitPiece() const { return getExpression()->isBitPiece(); }
+ bool isFragment() const { return getExpression()->isFragment(); }
const DIExpression *getExpression() const { return Expression; }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
@@ -128,8 +128,8 @@ public:
void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
Values.append(Vals.begin(), Vals.end());
sortUniqueValues();
- assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
- return V.isBitPiece();
+ assert(all_of(Values, [](DebugLocEntry::Value V) {
+ return V.isFragment();
}) && "value must be a piece");
}
@@ -172,11 +172,11 @@ inline bool operator==(const DebugLocEntry::Value &A,
llvm_unreachable("unhandled EntryKind");
}
-/// \brief Compare two pieces based on their offset.
+/// Compare two fragments based on their offset.
inline bool operator<(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
- return A.getExpression()->getBitPieceOffset() <
- B.getExpression()->getBitPieceOffset();
+ return A.getExpression()->getFragmentInfo()->OffsetInBits <
+ B.getExpression()->getFragmentInfo()->OffsetInBits;
}
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 4ad3e18..9c324ea 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -221,9 +221,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
Asm->EmitInt32((*HI)->Data.Values.size());
for (HashDataContents *HD : (*HI)->Data.Values) {
// Emit the DIE offset
- DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
- assert(CU && "Accelerated DIE should belong to a CU.");
- Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
+ Asm->EmitInt32(HD->Die->getDebugSectionOffset());
// If we have multiple Atoms emit that info too.
// FIXME: A bit of a hack, we either emit only one atom or all info.
if (HeaderData.Atoms.size() > 1) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4d81441..05ac1cb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -126,8 +126,7 @@ public:
uint16_t type; // enum AtomType
uint16_t form; // DWARF DW_FORM_ defines
- LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
- : type(type), form(form) {}
+ constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
#ifndef NDEBUG
void print(raw_ostream &O) {
O << "Type: " << dwarf::AtomTypeString(type) << "\n"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2eae1b2..e08306b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,16 +39,16 @@
using namespace llvm;
DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
- : EHStreamer(A), shouldEmitCFI(false) {}
+ : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {}
void DwarfCFIExceptionBase::markFunctionEnd() {
endFragment();
- if (MMI->getLandingPads().empty())
- return;
-
// Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
+ if (!Asm->MF->getLandingPads().empty()) {
+ MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF);
+ NonConstMF->tidyLandingPads();
+ }
}
void DwarfCFIExceptionBase::endFragment() {
@@ -59,7 +59,7 @@ void DwarfCFIExceptionBase::endFragment() {
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
: DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
forceEmitPersonality(false), shouldEmitLSDA(false),
- shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+ shouldEmitMoves(false) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -70,9 +70,6 @@ void DwarfCFIException::endModule() {
if (!Asm->MAI->usesCFIForEH())
return;
- if (moveTypeModule == AsmPrinter::CFI_M_Debug)
- Asm->OutStreamer->EmitCFISections(false, true);
-
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -98,14 +95,10 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
const Function *F = MF->getFunction();
// If any landing pads survive, we need an EH table.
- bool hasLandingPads = !MMI->getLandingPads().empty();
+ bool hasLandingPads = !MF->getLandingPads().empty();
// See if we need frame move info.
AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
- if (MoveType == AsmPrinter::CFI_M_EH ||
- (MoveType == AsmPrinter::CFI_M_Debug &&
- moveTypeModule == AsmPrinter::CFI_M_None))
- moveTypeModule = MoveType;
shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
@@ -143,6 +136,12 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!shouldEmitCFI)
return;
+ if (!hasEmittedCFISections) {
+ if (Asm->needsOnlyDebugCFIMoves())
+ Asm->OutStreamer->EmitCFISections(false, true);
+ hasEmittedCFISections = true;
+ }
+
Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
// Indicate personality routine, if any.
@@ -160,8 +159,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const MCSymbol *Sym =
- TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI);
+ const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI);
Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
// Provide LSDA information.
@@ -171,7 +169,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
/// endFunction - Gather and emit post-function exception information.
///
-void DwarfCFIException::endFunction(const MachineFunction *) {
+void DwarfCFIException::endFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality)
return;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 7822814c..d904372 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -73,36 +73,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
}
-// Return const expression if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
- const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
- if (!CE || CE->getNumOperands() != 3 ||
- CE->getOpcode() != Instruction::GetElementPtr)
- return nullptr;
-
- // First operand points to a global struct.
- Value *Ptr = CE->getOperand(0);
- GlobalValue *GV = dyn_cast<GlobalValue>(Ptr);
- if (!GV || !isa<StructType>(GV->getValueType()))
- return nullptr;
-
- // Second operand is zero.
- const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
- if (!CI || !CI->isZero())
- return nullptr;
-
- // Third operand is offset.
- if (!isa<ConstantInt>(CE->getOperand(2)))
- return nullptr;
-
- return CE;
-}
-
-/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
- const DIGlobalVariable *GV) {
+ const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
// Check for pre-existence.
if (DIE *Die = getDIE(GV))
return Die;
@@ -126,6 +98,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
// We need the declaration DIE that is in the static member's class.
DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+ // If the global variable's type is different from the one in the class
+ // member type, assume that it's more specific and also emit it.
+ if (GTy != DD->resolve(SDMDecl->getBaseType()))
+ addType(*VariableDIE, GTy);
} else {
DeclContext = GV->getScope();
// Add name and type.
@@ -145,73 +121,82 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
else
addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+ if (uint32_t AlignInBytes = GV->getAlignInBytes())
+ addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+
// Add location.
bool addToAccelTable = false;
- if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
- // We cannot describe the location of dllimport'd variables: the computation
- // of their address requires loads from the IAT.
- if (!Global->hasDLLImportStorageClass()) {
+ DIELoc *Loc = nullptr;
+ std::unique_ptr<DIEDwarfExpression> DwarfExpr;
+ bool AllConstant = std::all_of(
+ GlobalExprs.begin(), GlobalExprs.end(),
+ [&](const GlobalExpr GE) {
+ return GE.Expr && GE.Expr->isConstant();
+ });
+
+ for (const auto &GE : GlobalExprs) {
+ const GlobalVariable *Global = GE.Var;
+ const DIExpression *Expr = GE.Expr;
+ // For compatibility with DWARF 3 and earlier,
+ // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
+ // DW_AT_const_value(X).
+ if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+ addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
+ // We cannot describe the location of dllimport'd variables: the
+ // computation of their address requires loads from the IAT.
+ } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
+ if (!Loc) {
+ Loc = new (DIEValueAllocator) DIELoc;
+ DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ }
addToAccelTable = true;
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- const MCSymbol *Sym = Asm->getSymbol(Global);
- if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
- // TODO: add debug info for emulated thread local mode.
- } else {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
- ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ if (Global) {
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
} else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
- // 3) followed by an OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
}
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
}
-
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- if (DD->useAllLinkageNames())
- addLinkageName(*VariableDIE, GV->getLinkageName());
- }
- } else if (const ConstantInt *CI =
- dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
- addConstantValue(*VariableDIE, CI, GTy);
- } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
- auto *Ptr = cast<GlobalValue>(CE->getOperand(0));
- if (!Ptr->hasDLLImportStorageClass()) {
- addToAccelTable = true;
- // GV is a merged global.
- DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- MCSymbol *Sym = Asm->getSymbol(Ptr);
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
- addUInt(*Loc, dwarf::DW_FORM_udata,
- Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(),
- Idx));
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ if (Expr) {
+ DwarfExpr->addFragmentOffset(Expr);
+ DwarfExpr->AddExpression(Expr);
+ }
}
}
+ if (Loc)
+ addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
+
+ if (DD->useAllLinkageNames())
+ addLinkageName(*VariableDIE, GV->getLinkageName());
if (addToAccelTable) {
DD->addAccelName(GV->getName(), *VariableDIE);
@@ -265,7 +250,7 @@ void DwarfCompileUnit::initStmtList() {
// is not okay to use line_table_start here.
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
StmtListValue =
- addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
TLOF.getDwarfLineSection()->getBeginSymbol());
}
@@ -450,7 +435,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
- if (IA->getDiscriminator())
+ if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
IA->getDiscriminator());
@@ -521,9 +506,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.addFragmentOffset(Expr);
DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
- DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
- addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+ DwarfExpr.AddExpression(Expr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
} else if (DVInsn->getOperand(0).isFPImm())
@@ -536,23 +522,21 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
}
// .. else use frame index.
- if (DV.getFrameIndex().empty())
+ if (!DV.hasFrameIndexExprs())
return VariableDie;
- auto Expr = DV.getExpression().begin();
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- for (auto FI : DV.getFrameIndex()) {
+ for (auto &Fragment : DV.getFrameIndexExprs()) {
unsigned FrameReg = 0;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+ DwarfExpr.addFragmentOffset(Fragment.Expr);
DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
FrameReg, Offset);
- DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
- ++Expr;
+ DwarfExpr.AddExpression(Fragment.Expr);
}
- addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
return VariableDie;
}
@@ -585,25 +569,22 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
return ObjectPointer;
}
-void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
- assert(Scope && Scope->getScopeNode());
- assert(!Scope->getInlinedAt());
- assert(!Scope->isAbstractScope());
- auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
-
- DD->getProcessedSPNodes().insert(Sub);
-
+void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+ if (Scope) {
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ // Collect lexical scope children first.
+ // ObjectPointer might be a local (non-argument) local variable if it's a
+ // block's synthetic this pointer.
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+ }
+
// If this is a variadic function, add an unspecified parameter.
DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
- // Collect lexical scope children first.
- // ObjectPointer might be a local (non-argument) local variable if it's a
- // block's synthetic this pointer.
- if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
- addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
-
// If we have a single element of null, it is a function that returns void.
// If we have more than one elements and the last one is null, it is a
// variadic function.
@@ -674,7 +655,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
else if (auto *T = dyn_cast<DIType>(Entity))
EntityDie = getOrCreateTypeDIE(T);
else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
- EntityDie = getOrCreateGlobalVariableDIE(GV);
+ EntityDie = getOrCreateGlobalVariableDIE(GV, {});
else
EntityDie = getDIE(Entity);
assert(EntityDie);
@@ -695,11 +676,7 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
} else {
- if (!D && !includeMinimalInlineScopes())
- // Lazily construct the subprogram if we didn't see either concrete or
- // inlined versions during codegen. (except in -gmlt ^ where we want
- // to omit these entirely)
- D = getOrCreateSubprogramDIE(SP);
+ assert(D || includeMinimalInlineScopes());
if (D)
// And attach the attributes
applySubprogramAttributesToDefinition(SP, *D);
@@ -750,18 +727,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression Expr(*Asm, *this, *Loc);
bool validReg;
if (Location.isReg())
- validReg = addRegisterOpPiece(*Loc, Location.getReg());
+ validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Location.getReg());
else
- validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+ validReg =
+ Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Location.getReg(), Location.getOffset());
if (!validReg)
return;
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, Expr.finalize());
}
/// Start with the address based on the location provided, and generate the
@@ -774,19 +755,22 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
const DIExpression *Expr = DV.getSingleExpression();
- bool ValidReg;
+ DIExpressionCursor ExprCursor(Expr);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- if (Location.getOffset()) {
- ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(),
- Location.getOffset());
- if (ValidReg)
- DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
- } else
- ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg());
+ auto Reg = Location.getReg();
+ DwarfExpr.addFragmentOffset(Expr);
+ bool ValidReg =
+ Location.getOffset()
+ ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
+ : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
+
+ if (!ValidReg)
+ return;
+
+ DwarfExpr.AddExpression(std::move(ExprCursor));
// Now attach the location information to the DIE.
- if (ValidReg)
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, Loc);
}
/// Add a Dwarf loclistptr attribute data and value.
@@ -802,7 +786,13 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
StringRef Name = Var.getName();
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(VariableDie, Var.getVariable());
+ const auto *DIVar = Var.getVariable();
+ if (DIVar)
+ if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
+ addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+
+ addSourceLine(VariableDie, DIVar);
addType(VariableDie, Var.getType());
if (Var.isArtificial())
addFlag(VariableDie, dwarf::DW_AT_artificial);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 90f74a3..a8025f1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -32,9 +32,6 @@ class DwarfCompileUnit : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
- /// Offset of the UnitDie from beginning of debug info section.
- unsigned DebugInfoOffset = 0;
-
/// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
/// the need to search for it in applyStmtList.
DIE::value_iterator StmtListValue;
@@ -84,8 +81,6 @@ public:
DwarfDebug *DW, DwarfFile *DWU);
unsigned getUniqueID() const { return UniqueID; }
- unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
- void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
DwarfCompileUnit *getSkeleton() const {
return Skeleton;
@@ -96,8 +91,16 @@ public:
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
void applyStmtList(DIE &D);
- /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
- DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
+ /// A pair of GlobalVariable and DIExpression.
+ struct GlobalExpr {
+ const GlobalVariable *Var;
+ const DIExpression *Expr;
+ };
+
+ /// Get or create global variable DIE.
+ DIE *
+ getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
+ ArrayRef<GlobalExpr> GlobalExprs);
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
@@ -176,7 +179,7 @@ public:
unsigned *ChildScopeCount = nullptr);
/// \brief Construct a DIE for this subprogram scope.
- void constructSubprogramScopeDIE(LexicalScope *Scope);
+ void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
@@ -190,20 +193,15 @@ public:
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
- const MCSymbol *getSectionSym() const {
- assert(Section);
- return Section->getBeginSymbol();
- }
-
unsigned getLength() {
return sizeof(uint32_t) + // Length field
- getHeaderSize() + UnitDie.getSize();
+ getHeaderSize() + getUnitDie().getSize();
}
void emitHeader(bool UseOffsets) override;
MCSymbol *getLabelBegin() const {
- assert(Section);
+ assert(getSection());
return LabelBegin;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7fba768..91a3d09 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -62,11 +62,6 @@ static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
-static cl::opt<bool> UnknownLocations(
- "use-unknown-locations", cl::Hidden,
- cl::desc("Make an absence of debug location information explicit."),
- cl::init(false));
-
static cl::opt<bool>
GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate GNU-style pubnames and pubtypes"),
@@ -81,12 +76,19 @@ namespace {
enum DefaultOnOff { Default, Enable, Disable };
}
+static cl::opt<DefaultOnOff> UnknownLocations(
+ "use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::values(clEnumVal(Default, "At top of block or after label"),
+ clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
+ cl::init(Default));
+
static cl::opt<DefaultOnOff>
DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::desc("Output prototype dwarf accelerator tables."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ clEnumVal(Disable, "Disabled")),
cl::init(Default));
static cl::opt<DefaultOnOff>
@@ -94,7 +96,7 @@ SplitDwarf("split-dwarf", cl::Hidden,
cl::desc("Output DWARF5 split debug info."),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ clEnumVal(Disable, "Disabled")),
cl::init(Default));
static cl::opt<DefaultOnOff>
@@ -102,7 +104,7 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate DWARF pubnames and pubtypes sections"),
cl::values(clEnumVal(Default, "Default for platform"),
clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ clEnumVal(Disable, "Disabled")),
cl::init(Default));
enum LinkageNameOption {
@@ -117,12 +119,13 @@ static cl::opt<LinkageNameOption>
"Default for platform"),
clEnumValN(AllLinkageNames, "All", "All"),
clEnumValN(AbstractLinkageNames, "Abstract",
- "Abstract subprograms"),
- clEnumValEnd),
+ "Abstract subprograms")),
cl::init(DefaultLinkageNames));
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "DWARF Debug Writer";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "writer";
+static const char *const DbgTimerDescription = "DWARF Debug Writer";
void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
@@ -196,7 +199,16 @@ const DIType *DbgVariable::getType() const {
return Ty;
}
-static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
+ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
+ [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
+ return A.Expr->getFragmentInfo()->OffsetInBits <
+ B.Expr->getFragmentInfo()->OffsetInBits;
+ });
+ return FrameIndexExprs;
+}
+
+static const DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
@@ -205,7 +217,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
- IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+ IsDarwin(A->TM.getTargetTriple().isOSDarwin()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -215,7 +227,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
CurFn = nullptr;
- Triple TT(Asm->getTargetTriple());
+ const Triple &TT = Asm->TM.getTargetTriple();
// Make sure we know our "debugger tuning." The target option takes
// precedence; fall back to triple-based defaults.
@@ -255,7 +267,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
- DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+ unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
// Use dwarf 4 by default if nothing is requested.
DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
@@ -349,10 +361,11 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
return !getLabelAfterInsn(Ranges.front().second);
}
-template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
F(CU);
if (auto *SkelCU = CU.getSkeleton())
- F(*SkelCU);
+ if (CU.getCUNode()->getSplitDebugInlining())
+ F(*SkelCU);
}
void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
@@ -360,13 +373,13 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
- const MDNode *SP = Scope->getScopeNode();
+ auto *SP = cast<DISubprogram>(Scope->getScopeNode());
ProcessedSPNodes.insert(SP);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit());
+ auto &CU = *CUMap.lookup(SP->getUnit());
forBothCUs(CU, [&](DwarfCompileUnit &CU) {
CU.constructAbstractSubprogramScopeDIE(Scope);
});
@@ -435,9 +448,9 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
}
if (useSplitDwarf())
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
else
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
if (DIUnit->getDWOId()) {
// This CU is either a clang module DWO or a skeleton CU.
@@ -449,8 +462,8 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
DIUnit->getSplitDebugFilename());
}
- CUMap.insert(std::make_pair(DIUnit, &NewCU));
- CUDieMap.insert(std::make_pair(&Die, &NewCU));
+ CUMap.insert({DIUnit, &NewCU});
+ CUDieMap.insert({&Die, &NewCU});
return NewCU;
}
@@ -460,11 +473,34 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
D->addChild(TheCU.constructImportedEntityDIE(N));
}
+/// Sort and unique GVEs by comparing their fragment offset.
+static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
+sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
+ std::sort(GVEs.begin(), GVEs.end(),
+ [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+ if (A.Expr != B.Expr && A.Expr && B.Expr) {
+ auto FragmentA = A.Expr->getFragmentInfo();
+ auto FragmentB = B.Expr->getFragmentInfo();
+ if (FragmentA && FragmentB)
+ return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+ }
+ return false;
+ });
+ GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
+ [](DwarfCompileUnit::GlobalExpr A,
+ DwarfCompileUnit::GlobalExpr B) {
+ return A.Expr == B.Expr;
+ }),
+ GVEs.end());
+ return GVEs;
+}
+
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
void DwarfDebug::beginModule() {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription, TimePassesIsEnabled);
if (DisableDebugInfoPrinting)
return;
@@ -475,13 +511,30 @@ void DwarfDebug::beginModule() {
// Tell MMI whether we have debug info.
MMI->setDebugInfoAvailability(NumDebugCUs > 0);
SingleCU = NumDebugCUs == 1;
+ DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
+ GVMap;
+ for (const GlobalVariable &Global : M->globals()) {
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ Global.getDebugInfo(GVs);
+ for (auto *GVE : GVs)
+ GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
+ }
for (DICompileUnit *CUNode : M->debug_compile_units()) {
DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
for (auto *IE : CUNode->getImportedEntities())
CU.addImportedEntity(IE);
- for (auto *GV : CUNode->getGlobalVariables())
- CU.getOrCreateGlobalVariableDIE(GV);
+
+ // Global Variables.
+ for (auto *GVE : CUNode->getGlobalVariables())
+ GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
+ DenseSet<DIGlobalVariable *> Processed;
+ for (auto *GVE : CUNode->getGlobalVariables()) {
+ DIGlobalVariable *GV = GVE->getVariable();
+ if (Processed.insert(GV).second)
+ CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
+ }
+
for (auto *Ty : CUNode->getEnumTypes()) {
// The enum types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
@@ -509,7 +562,7 @@ void DwarfDebug::finishVariableDefinitions() {
// FIXME: Consider the time-space tradeoff of just storing the unit pointer
// in the ConcreteVariables list, rather than looking it up again here.
// DIE::getUnit isn't simple - it walks parent pointers, etc.
- DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+ DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
assert(Unit);
DbgVariable *AbsVar = getExistingAbstractVariable(
InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
@@ -522,13 +575,11 @@ void DwarfDebug::finishVariableDefinitions() {
}
void DwarfDebug::finishSubprogramDefinitions() {
- for (auto &F : MMI->getModule()->functions())
- if (auto *SP = F.getSubprogram())
- if (ProcessedSPNodes.count(SP) &&
- SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
- forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
- CU.finishSubprogramDefinition(SP);
- });
+ for (const DISubprogram *SP : ProcessedSPNodes)
+ if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
+ forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
+ CU.finishSubprogramDefinition(SP);
+ });
}
void DwarfDebug::finalizeModuleInfo() {
@@ -715,10 +766,10 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
createAbstractVariable(Cleansed, Scope);
}
-// Collect variable information from side table maintained by MMI.
-void DwarfDebug::collectVariableInfoFromMMITable(
+// Collect variable information from side table maintained by MF.
+void DwarfDebug::collectVariableInfoFromMFTable(
DenseSet<InlinedVariable> &Processed) {
- for (const auto &VI : MMI->getVariableDbgInfo()) {
+ for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -765,7 +816,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-/// \brief If this and Next are describing different pieces of the same
+/// \brief If this and Next are describing different fragments of the same
/// variable, merge them by appending Next's values to the current
/// list of values.
/// Return true if the merge was successful.
@@ -773,15 +824,15 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
if (Begin == Next.Begin) {
auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
- if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece())
+ if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
return false;
- // We can only merge entries if none of the pieces overlap any others.
+ // We can only merge entries if none of the fragments overlap any others.
// In doing so, we can take advantage of the fact that both lists are
// sorted.
for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
for (; j < Next.Values.size(); ++j) {
- int res = DebugHandlerBase::pieceCmp(
+ int res = DebugHandlerBase::fragmentCmp(
cast<DIExpression>(Values[i].Expression),
cast<DIExpression>(Next.Values[j].Expression));
if (res == 0) // The two expressions overlap, we can't merge.
@@ -804,27 +855,27 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
/// Build the location list for all DBG_VALUEs in the function that
/// describe the same variable. If the ranges of several independent
-/// pieces of the same variable overlap partially, split them up and
+/// fragments of the same variable overlap partially, split them up and
/// combine the ranges. The resulting DebugLocEntries are will have
/// strict monotonically increasing begin addresses and will never
/// overlap.
//
// Input:
//
-// Ranges History [var, loc, piece ofs size]
-// 0 | [x, (reg0, piece 0, 32)]
-// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+// Ranges History [var, loc, fragment ofs size]
+// 0 | [x, (reg0, fragment 0, 32)]
+// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
// 2 | | ...
// 3 | [clobber reg0]
-// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
// x.
//
// Output:
//
-// [0-1] [x, (reg0, piece 0, 32)]
-// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)]
-// [3-4] [x, (reg1, piece 32, 32)]
-// [4- ] [x, (mem, piece 0, 64)]
+// [0-1] [x, (reg0, fragment 0, 32)]
+// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)]
+// [3-4] [x, (reg1, fragment 32, 32)]
+// [4- ] [x, (mem, fragment 0, 64)]
void
DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::InstrRanges &Ranges) {
@@ -842,11 +893,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
continue;
}
- // If this piece overlaps with any open ranges, truncate them.
+ // If this fragment overlaps with any open ranges, truncate them.
const DIExpression *DIExpr = Begin->getDebugExpression();
- auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
- [&](DebugLocEntry::Value R) {
- return piecesOverlap(DIExpr, R.getExpression());
+ auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
+ return fragmentsOverlap(DIExpr, R.getExpression());
});
OpenRanges.erase(Last, OpenRanges.end());
@@ -868,12 +918,12 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
DebugLocEntry Loc(StartLabel, EndLabel, Value);
bool couldMerge = false;
- // If this is a piece, it may belong to the current DebugLocEntry.
- if (DIExpr->isBitPiece()) {
+ // If this is a fragment, it may belong to the current DebugLocEntry.
+ if (DIExpr->isFragment()) {
// Add this value to the list of open ranges.
OpenRanges.push_back(Value);
- // Attempt to add the piece to the last entry.
+ // Attempt to add the fragment to the last entry.
if (!DebugLoc.empty())
if (DebugLoc.back().MergeValues(Loc))
couldMerge = true;
@@ -881,7 +931,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
if (!couldMerge) {
// Need to add a new DebugLocEntry. Add all values from still
- // valid non-overlapping pieces.
+ // valid non-overlapping fragments.
if (OpenRanges.size())
Loc.addValues(OpenRanges);
@@ -929,7 +979,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
DenseSet<InlinedVariable> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
- collectVariableInfoFromMMITable(Processed);
+ collectVariableInfoFromMFTable(Processed);
for (const auto &I : DbgValues) {
InlinedVariable IV = I.first;
@@ -996,30 +1046,82 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
assert(CurMI);
- // Check if source location changes, but ignore DBG_VALUE locations.
- if (!MI->isDebugValue()) {
- const DebugLoc &DL = MI->getDebugLoc();
- if (DL != PrevInstLoc) {
- if (DL) {
- unsigned Flags = 0;
- PrevInstLoc = DL;
- if (DL == PrologEndLoc) {
- Flags |= DWARF2_FLAG_PROLOGUE_END;
- PrologEndLoc = DebugLoc();
- Flags |= DWARF2_FLAG_IS_STMT;
- }
- if (DL.getLine() !=
- Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
- Flags |= DWARF2_FLAG_IS_STMT;
-
- const MDNode *Scope = DL.getScope();
- recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
- } else if (UnknownLocations) {
- PrevInstLoc = DL;
- recordSourceLine(0, 0, nullptr, 0);
+ // Check if source location changes, but ignore DBG_VALUE and CFI locations.
+ if (MI->isDebugValue() || MI->isCFIInstruction())
+ return;
+ const DebugLoc &DL = MI->getDebugLoc();
+ // When we emit a line-0 record, we don't update PrevInstLoc; so look at
+ // the last line number actually emitted, to see if it was line 0.
+ unsigned LastAsmLine =
+ Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+
+ if (DL == PrevInstLoc) {
+ // If we have an ongoing unspecified location, nothing to do here.
+ if (!DL)
+ return;
+ // We have an explicit location, same as the previous location.
+ // But we might be coming back to it after a line 0 record.
+ if (LastAsmLine == 0 && DL.getLine() != 0) {
+ // Reinstate the source location but not marked as a statement.
+ const MDNode *Scope = DL.getScope();
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
+ }
+ return;
+ }
+
+ if (!DL) {
+ // We have an unspecified location, which might want to be line 0.
+ // If we have already emitted a line-0 record, don't repeat it.
+ if (LastAsmLine == 0)
+ return;
+ // If user said Don't Do That, don't do that.
+ if (UnknownLocations == Disable)
+ return;
+ // See if we have a reason to emit a line-0 record now.
+ // Reasons to emit a line-0 record include:
+ // - User asked for it (UnknownLocations).
+ // - Instruction has a label, so it's referenced from somewhere else,
+ // possibly debug information; we want it to have a source location.
+ // - Instruction is at the top of a block; we don't want to inherit the
+ // location from the physically previous (maybe unrelated) block.
+ if (UnknownLocations == Enable || PrevLabel ||
+ (PrevInstBB && PrevInstBB != MI->getParent())) {
+ // Preserve the file and column numbers, if we can, to save space in
+ // the encoded line table.
+ // Do not update PrevInstLoc, it remembers the last non-0 line.
+ const MDNode *Scope = nullptr;
+ unsigned Column = 0;
+ if (PrevInstLoc) {
+ Scope = PrevInstLoc.getScope();
+ Column = PrevInstLoc.getCol();
}
+ recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
}
+ return;
+ }
+
+ // We have an explicit location, different from the previous location.
+ // Don't repeat a line-0 record, but otherwise emit the new location.
+ // (The new location might be an explicit line 0, which we do emit.)
+ if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+ return;
+ unsigned Flags = 0;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
+ PrologEndLoc = DebugLoc();
}
+ // If the line changed, we call that a new statement; unless we went to
+ // line 0 and came back, in which case it is not a new statement.
+ unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
+ if (DL.getLine() && DL.getLine() != OldLine)
+ Flags |= DWARF2_FLAG_IS_STMT;
+
+ const MDNode *Scope = DL.getScope();
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+
+ // If we're not at line 0, remember this location.
+ if (DL.getLine())
+ PrevInstLoc = DL;
}
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
@@ -1093,18 +1195,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
"endFunction should be called with the same function as beginFunction");
const DISubprogram *SP = MF->getFunction()->getSubprogram();
- if (!MMI->hasDebugInfo() || LScopes.empty() || !SP ||
+ if (!MMI->hasDebugInfo() || !SP ||
SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
- // If we don't have a lexical scope for this function then there will
- // be a hole in the range information. Keep note of this by setting the
- // previously used section to nullptr.
+ // If we don't have a subprogram for this function then there will be a hole
+ // in the range information. Keep note of this by setting the previously
+ // used section to nullptr.
PrevCU = nullptr;
CurFn = nullptr;
DebugHandlerBase::endFunction(MF);
- // Mark functions with no debug info on any instructions, but a
- // valid DISubprogram as processed.
- if (SP)
- ProcessedSPNodes.insert(SP);
return;
}
@@ -1112,7 +1210,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- SP = cast<DISubprogram>(FnScope->getScopeNode());
+ assert(!FnScope || SP == FnScope->getScopeNode());
DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
DenseSet<InlinedVariable> ProcessedVars;
@@ -1154,10 +1252,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
constructAbstractSubprogramScopeDIE(AScope);
}
- TheCU.constructSubprogramScopeDIE(FnScope);
+ ProcessedSPNodes.insert(SP);
+ TheCU.constructSubprogramScopeDIE(SP, FnScope);
if (auto *SkelCU = TheCU.getSkeleton())
- if (!LScopes.getAbstractScopesList().empty())
- SkelCU->constructSubprogramScopeDIE(FnScope);
+ if (!LScopes.getAbstractScopesList().empty() &&
+ TheCU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructSubprogramScopeDIE(SP, FnScope);
// Clear debug info
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
@@ -1181,7 +1281,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
Fn = Scope->getFilename();
Dir = Scope->getDirectory();
if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
- Discriminator = LBF->getDiscriminator();
+ if (getDwarfVersion() >= 4)
+ Discriminator = LBF->getDiscriminator();
unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1396,9 +1497,9 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
- unsigned PieceOffsetInBits) {
- DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(),
- Streamer);
+ DwarfExpression &DwarfExpr) {
+ DIExpressionCursor ExprCursor(Value.getExpression());
+ DwarfExpr.addFragmentOffset(Value.getExpression());
// Regular entry.
if (Value.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
@@ -1408,25 +1509,16 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
DwarfExpr.AddUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
MachineLocation Loc = Value.getLoc();
- const DIExpression *Expr = Value.getExpression();
- if (!Expr || !Expr->getNumElements())
- // Regular entry.
- AP.EmitDwarfRegOp(Streamer, Loc);
- else {
- // Complex address entry.
- const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
- if (Loc.getOffset()) {
- DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
- DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
- PieceOffsetInBits);
- } else
- DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(),
- PieceOffsetInBits);
- }
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+ if (Loc.getOffset())
+ DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
+ else
+ DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.AddUnsignedConstant(RawBytes);
}
+ DwarfExpr.AddExpression(std::move(ExprCursor));
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1434,36 +1526,24 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
const DIBasicType *BT) {
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
const DebugLocEntry::Value &Value = Values[0];
- if (Value.isBitPiece()) {
- // Emit all pieces that belong to the same variable and range.
- assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
- return P.isBitPiece();
- }) && "all values are expected to be pieces");
+ if (Value.isFragment()) {
+ // Emit all fragments that belong to the same variable and range.
+ assert(all_of(Values, [](DebugLocEntry::Value P) {
+ return P.isFragment();
+ }) && "all values are expected to be fragments");
assert(std::is_sorted(Values.begin(), Values.end()) &&
- "pieces are expected to be sorted");
-
- unsigned Offset = 0;
- for (auto Piece : Values) {
- const DIExpression *Expr = Piece.getExpression();
- unsigned PieceOffset = Expr->getBitPieceOffset();
- unsigned PieceSize = Expr->getBitPieceSize();
- assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
- if (Offset < PieceOffset) {
- // The DWARF spec seriously mandates pieces with no locations for gaps.
- DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(),
- Streamer);
- Expr.AddOpPiece(PieceOffset-Offset, 0);
- Offset += PieceOffset-Offset;
- }
- Offset += PieceSize;
+ "fragments are expected to be sorted");
+
+ for (auto Fragment : Values)
+ emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr);
- emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
- }
} else {
- assert(Values.size() == 1 && "only pieces may have >1 value");
- emitDebugLocValue(AP, BT, Streamer, Value, 0);
+ assert(Values.size() == 1 && "only fragments may have >1 value");
+ emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr);
}
+ DwarfExpr.finalize();
}
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
@@ -1514,14 +1594,14 @@ void DwarfDebug::emitDebugLocDWO() {
// rather than two. We could get fancier and try to, say, reuse an
// address we know we've emitted elsewhere (the start of the function?
// The start of the CU or CU subrange that encloses this range?)
- Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
+ Asm->EmitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.BeginSym);
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
emitDebugLocEntryLocation(Entry);
}
- Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry);
+ Asm->EmitInt8(dwarf::DW_LLE_end_of_list);
}
}
@@ -1807,7 +1887,7 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+ NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
NewCU.initStmtList();
@@ -1889,8 +1969,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
- TypeUnitsUnderConstruction.push_back(
- std::make_pair(std::move(OwnedUnit), CTy));
+ TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
CU.getLanguage());
@@ -1900,11 +1979,10 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
Ins.first->second = Signature;
if (useSplitDwarf())
- NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+ NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
else {
CU.applyStmtList(UnitDie);
- NewTU.initSection(
- Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
}
NewTU.setType(NewTU.createTypeDIE(CTy));
@@ -1968,3 +2046,7 @@ void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
return;
AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
}
+
+uint16_t DwarfDebug::getDwarfVersion() const {
+ return Asm->OutStreamer->getContext().getDwarfVersion();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6b06757..253e3f0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/DIE.h"
@@ -53,7 +54,7 @@ class MachineModuleInfo;
///
/// Variables can be created from allocas, in which case they're generated from
/// the MMI table. Such variables can have multiple expressions and frame
-/// indices. The \a Expr and \a FrameIndices array must match.
+/// indices.
///
/// Variables can be created from \c DBG_VALUE instructions. Those whose
/// location changes over time use \a DebugLocListIndex, while those with a
@@ -63,11 +64,16 @@ class MachineModuleInfo;
class DbgVariable {
const DILocalVariable *Var; /// Variable Descriptor.
const DILocation *IA; /// Inlined at location.
- SmallVector<const DIExpression *, 1> Expr; /// Complex address.
DIE *TheDIE = nullptr; /// Variable DIE.
unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
- SmallVector<int, 1> FrameIndex; /// Frame index.
+
+ struct FrameIndexExpr {
+ int FI;
+ const DIExpression *Expr;
+ };
+ mutable SmallVector<FrameIndexExpr, 1>
+ FrameIndexExprs; /// Frame index + expression.
public:
/// Construct a DbgVariable.
@@ -79,21 +85,18 @@ public:
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
- assert(Expr.empty() && "Already initialized?");
- assert(FrameIndex.empty() && "Already initialized?");
+ assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
assert(~FI && "Expected valid index");
- Expr.push_back(E);
- FrameIndex.push_back(FI);
+ FrameIndexExprs.push_back({FI, E});
}
/// Initialize from a DBG_VALUE instruction.
void initializeDbgValue(const MachineInstr *DbgValue) {
- assert(Expr.empty() && "Already initialized?");
- assert(FrameIndex.empty() && "Already initialized?");
+ assert(FrameIndexExprs.empty() && "Already initialized?");
assert(!MInsn && "Already initialized?");
assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
@@ -102,16 +105,15 @@ public:
MInsn = DbgValue;
if (auto *E = DbgValue->getDebugExpression())
if (E->getNumElements())
- Expr.push_back(E);
+ FrameIndexExprs.push_back({0, E});
}
// Accessors.
const DILocalVariable *getVariable() const { return Var; }
const DILocation *getInlinedAt() const { return IA; }
- ArrayRef<const DIExpression *> getExpression() const { return Expr; }
const DIExpression *getSingleExpression() const {
- assert(MInsn && Expr.size() <= 1);
- return Expr.size() ? Expr[0] : nullptr;
+ assert(MInsn && FrameIndexExprs.size() <= 1);
+ return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
@@ -119,7 +121,9 @@ public:
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return Var->getName(); }
const MachineInstr *getMInsn() const { return MInsn; }
- ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+ /// Get the FI entries, sorted by fragment offset.
+ ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
+ bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
void addMMIEntry(const DbgVariable &V) {
assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -127,16 +131,15 @@ public:
assert(V.Var == Var && "conflicting variable");
assert(V.IA == IA && "conflicting inlined-at location");
- assert(!FrameIndex.empty() && "Expected an MMI entry");
- assert(!V.FrameIndex.empty() && "Expected an MMI entry");
- assert(Expr.size() == FrameIndex.size() && "Mismatched expressions");
- assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions");
+ assert(!FrameIndexExprs.empty() && "Expected an MMI entry");
+ assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry");
- Expr.append(V.Expr.begin(), V.Expr.end());
- FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
- assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) {
- return E && E->isBitPiece();
- }) && "conflicting locations for variable");
+ FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end());
+ assert(all_of(FrameIndexExprs,
+ [](FrameIndexExpr &FIE) {
+ return FIE.Expr && FIE.Expr->isFragment();
+ }) &&
+ "conflicting locations for variable");
}
// Translate tag to proper Dwarf tag.
@@ -166,11 +169,11 @@ public:
bool hasComplexAddress() const {
assert(MInsn && "Expected DBG_VALUE, not MMI variable");
- assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable");
- assert(
- (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) &&
- "Invalid Expr for DBG_VALUE");
- return !Expr.empty();
+ assert((FrameIndexExprs.empty() ||
+ (FrameIndexExprs.size() == 1 &&
+ FrameIndexExprs[0].Expr->getNumElements())) &&
+ "Invalid Expr for DBG_VALUE");
+ return !FrameIndexExprs.empty();
}
bool isBlockByrefVariable() const;
const DIType *getType() const;
@@ -216,7 +219,9 @@ class DwarfDebug : public DebugHandlerBase {
/// This is a collection of subprogram MDNodes that are processed to
/// create DIEs.
- SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+ SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
+ SmallPtrSet<const DISubprogram *, 16>>
+ ProcessedSPNodes;
/// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn;
@@ -254,9 +259,6 @@ class DwarfDebug : public DebugHandlerBase {
/// Whether to emit all linkage names, or just abstract subprograms.
bool UseAllLinkageNames;
- /// Version of dwarf we're emitting.
- unsigned DwarfVersion;
-
/// DWARF5 Experimental Options
/// @{
bool HasDwarfAccelTables;
@@ -443,9 +445,8 @@ class DwarfDebug : public DebugHandlerBase {
void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
const DbgValueHistoryMap::InstrRanges &Ranges);
- /// Collect variable information from the side table maintained
- /// by MMI.
- void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
+ /// Collect variable information from the side table maintained by MF.
+ void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
public:
//===--------------------------------------------------------------------===//
@@ -515,7 +516,7 @@ public:
bool useSplitDwarf() const { return HasSplitDwarf; }
/// Returns the Dwarf Version.
- unsigned getDwarfVersion() const { return DwarfVersion; }
+ uint16_t getDwarfVersion() const;
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
@@ -537,11 +538,6 @@ public:
return Ref.resolve();
}
- /// Find the DwarfCompileUnit for the given CU Die.
- DwarfCompileUnit *lookupUnit(const DIE *CU) const {
- return CUDieMap.lookup(CU);
- }
-
void addSubprogramNames(const DISubprogram *SP, DIE &Die);
AddressPool &getAddressPool() { return AddrPool; }
@@ -559,12 +555,6 @@ public:
/// A helper function to check whether the DIE for a given Scope is
/// going to be null.
bool isLexicalScopeDIENull(LexicalScope *Scope);
-
- // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
-
- SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
- return ProcessedSPNodes;
- }
};
} // End of namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 8287f28..80d5bd2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -28,6 +28,8 @@ protected:
/// Per-function flag to indicate if frame CFI info should be emitted.
bool shouldEmitCFI;
+ /// Per-module flag to indicate if .cfi_section has beeen emitted.
+ bool hasEmittedCFISections;
void markFunctionEnd() override;
void endFragment() override;
@@ -46,8 +48,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
/// Per-function flag to indicate if frame moves info should be emitted.
bool shouldEmitMoves;
- AsmPrinter::CFIMoveType moveTypeModule;
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -81,7 +81,7 @@ public:
~ARMException() override;
/// Emit all exception information that should come after the content.
- void endModule() override;
+ void endModule() override {}
/// Gather pre-function exception information. Assumes being emitted
/// immediately after the function entry point.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 7dbc6cb..61b2c7e6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -46,7 +46,9 @@ void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
}
void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
- assert(SizeInBits > 0 && "piece has size zero");
+ if (!SizeInBits)
+ return;
+
const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
EmitOp(dwarf::DW_OP_bit_piece);
@@ -57,6 +59,7 @@ void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
unsigned ByteSize = SizeInBits / SizeOfByte;
EmitUnsigned(ByteSize);
}
+ this->OffsetInBits += SizeInBits;
}
void DwarfExpression::AddShr(unsigned ShiftBy) {
@@ -82,10 +85,8 @@ bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
return true;
}
-bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
- unsigned MachineReg,
- unsigned PieceSizeInBits,
- unsigned PieceOffsetInBits) {
+bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+ unsigned MachineReg, unsigned MaxSize) {
if (!TRI.isPhysicalRegister(MachineReg))
return false;
@@ -94,13 +95,11 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
// If this is a valid register number, emit it.
if (Reg >= 0) {
AddReg(Reg);
- if (PieceSizeInBits)
- AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
return true;
}
// Walk up the super-register chain until we find a valid number.
- // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+ // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
Reg = TRI.getDwarfRegNum(*SR, false);
if (Reg >= 0) {
@@ -108,27 +107,15 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
AddReg(Reg, "super-register");
- if (PieceOffsetInBits == RegOffset) {
- AddOpPiece(Size, RegOffset);
- } else {
- // If this is part of a variable in a sub-register at a
- // non-zero offset, we need to manually shift the value into
- // place, since the DW_OP_piece describes the part of the
- // variable, not the position of the subregister.
- if (RegOffset)
- AddShr(RegOffset);
- AddOpPiece(Size, PieceOffsetInBits);
- }
+ // Use a DW_OP_bit_piece to describe the sub-register.
+ setSubRegisterPiece(Size, RegOffset);
return true;
}
}
// Otherwise, attempt to find a covering set of sub-register numbers.
// For example, Q0 on ARM is a composition of D0+D1.
- //
- // Keep track of the current position so we can emit the more
- // efficient DW_OP_piece.
- unsigned CurPos = PieceOffsetInBits;
+ unsigned CurPos = 0;
// The size of the register in bits, assuming 8 bits per byte.
unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
// Keep track of the bits in the register we already emitted, so we
@@ -150,7 +137,12 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
// its range, emit a DWARF piece for it.
if (Reg >= 0 && Intersection.any()) {
AddReg(Reg, "sub-register");
- AddOpPiece(Size, Offset == CurPos ? 0 : Offset);
+ if (Offset >= MaxSize)
+ break;
+ // Emit a piece for the any gap in the coverage.
+ if (Offset > CurPos)
+ AddOpPiece(Offset - CurPos);
+ AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
CurPos = Offset + Size;
// Mark it as emitted.
@@ -158,7 +150,7 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
}
}
- return CurPos > PieceOffsetInBits;
+ return CurPos;
}
void DwarfExpression::AddStackValue() {
@@ -194,92 +186,114 @@ void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
}
}
-static unsigned getOffsetOrZero(unsigned OffsetInBits,
- unsigned PieceOffsetInBits) {
- if (OffsetInBits == PieceOffsetInBits)
- return 0;
- assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces");
- return OffsetInBits;
-}
-
bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
- const DIExpression *Expr,
+ DIExpressionCursor &ExprCursor,
unsigned MachineReg,
- unsigned PieceOffsetInBits) {
- auto I = Expr->expr_op_begin();
- auto E = Expr->expr_op_end();
- if (I == E)
- return AddMachineRegPiece(TRI, MachineReg);
+ unsigned FragmentOffsetInBits) {
+ if (!ExprCursor)
+ return AddMachineReg(TRI, MachineReg);
// Pattern-match combinations for which more efficient representations exist
// first.
bool ValidReg = false;
- switch (I->getOp()) {
- case dwarf::DW_OP_bit_piece: {
- unsigned OffsetInBits = I->getArg(0);
- unsigned SizeInBits = I->getArg(1);
- // Piece always comes at the end of the expression.
- return AddMachineRegPiece(TRI, MachineReg, SizeInBits,
- getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+ auto Op = ExprCursor.peek();
+ switch (Op->getOp()) {
+ default: {
+ auto Fragment = ExprCursor.getFragmentInfo();
+ ValidReg = AddMachineReg(TRI, MachineReg,
+ Fragment ? Fragment->SizeInBits : ~1U);
+ break;
}
case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus: {
// [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
// [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
- auto N = I.getNext();
- if (N != E && N->getOp() == dwarf::DW_OP_deref) {
- unsigned Offset = I->getArg(0);
+ auto N = ExprCursor.peekNext();
+ if (N && N->getOp() == dwarf::DW_OP_deref) {
+ unsigned Offset = Op->getArg(0);
ValidReg = AddMachineRegIndirect(
- TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
- std::advance(I, 2);
- break;
+ TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+ ExprCursor.consume(2);
} else
- ValidReg = AddMachineRegPiece(TRI, MachineReg);
- }
- case dwarf::DW_OP_deref: {
- // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(TRI, MachineReg);
- ++I;
- break;
+ ValidReg = AddMachineReg(TRI, MachineReg);
+ break;
}
- default:
- llvm_unreachable("unsupported operand");
+ case dwarf::DW_OP_deref:
+ // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+ ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+ ExprCursor.take();
+ break;
}
- if (!ValidReg)
- return false;
-
- // Emit remaining elements of the expression.
- AddExpression(I, E, PieceOffsetInBits);
- return true;
+ return ValidReg;
}
-void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
- DIExpression::expr_op_iterator E,
- unsigned PieceOffsetInBits) {
- for (; I != E; ++I) {
- switch (I->getOp()) {
- case dwarf::DW_OP_bit_piece: {
- unsigned OffsetInBits = I->getArg(0);
- unsigned SizeInBits = I->getArg(1);
- AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+ unsigned FragmentOffsetInBits) {
+ while (ExprCursor) {
+ auto Op = ExprCursor.take();
+ switch (Op->getOp()) {
+ case dwarf::DW_OP_LLVM_fragment: {
+ unsigned SizeInBits = Op->getArg(1);
+ unsigned FragmentOffset = Op->getArg(0);
+ // The fragment offset must have already been adjusted by emitting an
+ // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
+ // location.
+ assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+
+ // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+ // a super-register by splicing together sub-registers, subtract the size
+ // of the pieces that was already emitted.
+ SizeInBits -= OffsetInBits - FragmentOffset;
+
+ // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+ // sub-register that is smaller than the current fragment's size, use it.
+ if (SubRegisterSizeInBits)
+ SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
+
+ AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+ setSubRegisterPiece(0, 0);
break;
}
case dwarf::DW_OP_plus:
EmitOp(dwarf::DW_OP_plus_uconst);
- EmitUnsigned(I->getArg(0));
+ EmitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_minus:
// There is no OP_minus_uconst.
EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(I->getArg(0));
+ EmitUnsigned(Op->getArg(0));
EmitOp(dwarf::DW_OP_minus);
break;
case dwarf::DW_OP_deref:
EmitOp(dwarf::DW_OP_deref);
break;
+ case dwarf::DW_OP_constu:
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_stack_value:
+ AddStackValue();
+ break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
}
+
+void DwarfExpression::finalize() {
+ if (SubRegisterSizeInBits)
+ AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+}
+
+void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
+ if (!Expr || !Expr->isFragment())
+ return;
+
+ uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits;
+ assert(FragmentOffset >= OffsetInBits &&
+ "overlapping or duplicate fragments");
+ if (FragmentOffset > OffsetInBits)
+ AddOpPiece(FragmentOffset - OffsetInBits);
+ OffsetInBits = FragmentOffset;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 5fff28d..fd90fa0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -25,17 +25,86 @@ class TargetRegisterInfo;
class DwarfUnit;
class DIELoc;
+/// Holds a DIExpression and keeps track of how many operands have been consumed
+/// so far.
+class DIExpressionCursor {
+ DIExpression::expr_op_iterator Start, End;
+public:
+ DIExpressionCursor(const DIExpression *Expr) {
+ if (!Expr) {
+ assert(Start == End);
+ return;
+ }
+ Start = Expr->expr_op_begin();
+ End = Expr->expr_op_end();
+ }
+
+ DIExpressionCursor(ArrayRef<uint64_t> Expr)
+ : Start(Expr.begin()), End(Expr.end()) {}
+
+ /// Consume one operation.
+ Optional<DIExpression::ExprOperand> take() {
+ if (Start == End)
+ return None;
+ return *(Start++);
+ }
+
+ /// Consume N operations.
+ void consume(unsigned N) { std::advance(Start, N); }
+
+ /// Return the current operation.
+ Optional<DIExpression::ExprOperand> peek() const {
+ if (Start == End)
+ return None;
+ return *(Start);
+ }
+
+ /// Return the next operation.
+ Optional<DIExpression::ExprOperand> peekNext() const {
+ if (Start == End)
+ return None;
+
+ auto Next = Start.getNext();
+ if (Next == End)
+ return None;
+
+ return *Next;
+ }
+ /// Determine whether there are any operations left in this expression.
+ operator bool() const { return Start != End; }
+
+ /// Retrieve the fragment information, if any.
+ Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
+ return DIExpression::getFragmentInfo(Start, End);
+ }
+};
+
/// Base class containing the logic for constructing DWARF expressions
/// independently of whether they are emitted into a DIE or into a .debug_loc
/// entry.
class DwarfExpression {
protected:
- // Various convenience accessors that extract things out of AsmPrinter.
unsigned DwarfVersion;
+ /// Current Fragment Offset in Bits.
+ uint64_t OffsetInBits = 0;
+
+ /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
+ unsigned SubRegisterSizeInBits = 0;
+ unsigned SubRegisterOffsetInBits = 0;
+
+ /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
+ /// to represent a subregister.
+ void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ SubRegisterSizeInBits = SizeInBits;
+ SubRegisterOffsetInBits = OffsetInBits;
+ }
public:
DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- virtual ~DwarfExpression() {}
+ virtual ~DwarfExpression() {};
+
+ /// This needs to be called last to commit any pending changes.
+ void finalize();
/// Output a dwarf operand and an optional assembler comment.
virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
@@ -52,24 +121,25 @@ public:
/// Emit an (double-)indirect dwarf register operation.
void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
- /// Emit a dwarf register operation for describing
- /// - a small value occupying only part of a register or
- /// - a register representing only part of a value.
+ /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
+ /// \param OffsetInBits This is an optional offset into the location that
+ /// is at the top of the DWARF stack.
void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+
/// Emit a shift-right dwarf expression.
void AddShr(unsigned ShiftBy);
+
/// Emit a DW_OP_stack_value, if supported.
///
- /// The proper way to describe a constant value is
- /// DW_OP_constu <const>, DW_OP_stack_value.
- /// Unfortunately, DW_OP_stack_value was not available until DWARF-4,
- /// so we will continue to generate DW_OP_constu <const> for DWARF-2
- /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
- /// actually describes a value at a constant addess, not a constant value.
- /// However, in the past there was no better way to describe a constant
- /// value, so the producers and consumers started to rely on heuristics
- /// to disambiguate the value vs. location status of the expression.
- /// See PR21176 for more details.
+ /// The proper way to describe a constant value is DW_OP_constu <const>,
+ /// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available
+ /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
+ /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
+ /// <const> actually describes a value at a constant addess, not a constant
+ /// value. However, in the past there was no better way to describe a
+ /// constant value, so the producers and consumers started to rely on
+ /// heuristics to disambiguate the value vs. location status of the
+ /// expression. See PR21176 for more details.
void AddStackValue();
/// Emit an indirect dwarf register operation for the given machine register.
@@ -77,23 +147,23 @@ public:
bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
int Offset = 0);
- /// \brief Emit a partial DWARF register operation.
- /// \param MachineReg the register
- /// \param PieceSizeInBits size and
- /// \param PieceOffsetInBits offset of the piece in bits, if this is one
- /// piece of an aggregate value.
+ /// Emit a partial DWARF register operation.
+ ///
+ /// \param MachineReg The register number.
+ /// \param MaxSize If the register must be composed from
+ /// sub-registers this is an upper bound
+ /// for how many bits the emitted DW_OP_piece
+ /// may cover.
///
- /// If size and offset is zero an operation for the entire
- /// register is emitted: Some targets do not provide a DWARF
- /// register number for every register. If this is the case, this
- /// function will attempt to emit a DWARF register by emitting a
- /// piece of a super-register or by piecing together multiple
- /// subregisters that alias the register.
+ /// If size and offset is zero an operation for the entire register is
+ /// emitted: Some targets do not provide a DWARF register number for every
+ /// register. If this is the case, this function will attempt to emit a DWARF
+ /// register by emitting a fragment of a super-register or by piecing together
+ /// multiple subregisters that alias the register.
///
/// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg,
- unsigned PieceSizeInBits = 0,
- unsigned PieceOffsetInBits = 0);
+ bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned MaxSize = ~1U);
/// Emit a signed constant.
void AddSignedConstant(int64_t Value);
@@ -102,20 +172,29 @@ public:
/// Emit an unsigned constant.
void AddUnsignedConstant(const APInt &Value);
- /// \brief Emit an entire expression on top of a machine register location.
+ /// Emit a machine register location. As an optimization this may also consume
+ /// the prefix of a DwarfExpression if a more efficient representation for
+ /// combining the register location and the first operation exists.
///
- /// \param PieceOffsetInBits If this is one piece out of a fragmented
- /// location, this is the offset of the piece inside the entire variable.
- /// \return false if no DWARF register exists for MachineReg.
+ /// \param FragmentOffsetInBits If this is one fragment out of a fragmented
+ /// location, this is the offset of the
+ /// fragment inside the entire variable.
+ /// \return false if no DWARF register exists
+ /// for MachineReg.
bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
- const DIExpression *Expr, unsigned MachineReg,
- unsigned PieceOffsetInBits = 0);
- /// Emit a the operations remaining the DIExpressionIterator I.
- /// \param PieceOffsetInBits If this is one piece out of a fragmented
- /// location, this is the offset of the piece inside the entire variable.
- void AddExpression(DIExpression::expr_op_iterator I,
- DIExpression::expr_op_iterator E,
- unsigned PieceOffsetInBits = 0);
+ DIExpressionCursor &Expr, unsigned MachineReg,
+ unsigned FragmentOffsetInBits = 0);
+ /// Emit all remaining operations in the DIExpressionCursor.
+ ///
+ /// \param FragmentOffsetInBits If this is one fragment out of multiple
+ /// locations, this is the offset of the
+ /// fragment inside the entire variable.
+ void AddExpression(DIExpressionCursor &&Expr,
+ unsigned FragmentOffsetInBits = 0);
+
+ /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
+ /// the fragment described by \c Expr.
+ void addFragmentOffset(const DIExpression *Expr);
};
/// DwarfExpression implementation for .debug_loc entries.
@@ -146,6 +225,10 @@ public:
void EmitUnsigned(uint64_t Value) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+ DIELoc *finalize() {
+ DwarfExpression::finalize();
+ return &DIE;
+ }
};
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e9fe98a..595f1d9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -19,37 +19,7 @@
namespace llvm {
DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
- : Asm(AP), StrPool(DA, *Asm, Pref) {}
-
-DwarfFile::~DwarfFile() {
- for (DIEAbbrev *Abbrev : Abbreviations)
- Abbrev->~DIEAbbrev();
-}
-
-// Define a unique number for the abbreviation.
-//
-DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
- FoldingSetNodeID ID;
- DIEAbbrev Abbrev = Die.generateAbbrev();
- Abbrev.Profile(ID);
-
- void *InsertPos;
- if (DIEAbbrev *Existing =
- AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
- Die.setAbbrevNumber(Existing->getNumber());
- return *Existing;
- }
-
- // Move the abbreviation to the heap and assign a number.
- DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev));
- Abbreviations.push_back(New);
- New->setNumber(Abbreviations.size());
- Die.setAbbrevNumber(Abbreviations.size());
-
- // Store it for lookup.
- AbbreviationsSet.InsertNode(New, InsertPos);
- return *New;
-}
+ : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
CUs.push_back(std::move(U));
@@ -80,7 +50,7 @@ void DwarfFile::computeSizeAndOffsets() {
// Iterate over each compile unit and set the size and offsets for each
// DIE within each compile unit. All offsets are CU relative.
for (const auto &TheU : CUs) {
- TheU->setDebugInfoOffset(SecOffset);
+ TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
}
@@ -98,44 +68,10 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
// Compute the size and offset of a DIE. The offset is relative to start of the
// CU. It returns the offset after laying out the DIE.
unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
- // Record the abbreviation.
- const DIEAbbrev &Abbrev = assignAbbrevNumber(Die);
-
- // Set DIE offset
- Die.setOffset(Offset);
-
- // Start the size with the size of abbreviation code.
- Offset += getULEB128Size(Die.getAbbrevNumber());
-
- // Size the DIE attribute values.
- for (const auto &V : Die.values())
- // Size attribute value.
- Offset += V.SizeOf(Asm);
-
- // Size the DIE children if any.
- if (Die.hasChildren()) {
- (void)Abbrev;
- assert(Abbrev.hasChildren() && "Children flag not set");
-
- for (auto &Child : Die.children())
- Offset = computeSizeAndOffset(Child, Offset);
-
- // End of children marker.
- Offset += sizeof(int8_t);
- }
-
- Die.setSize(Offset - Die.getOffset());
- return Offset;
+ return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset);
}
-void DwarfFile::emitAbbrevs(MCSection *Section) {
- // Check to see if it is worth the effort.
- if (!Abbreviations.empty()) {
- // Start the debug abbrev section.
- Asm->OutStreamer->SwitchSection(Section);
- Asm->emitDwarfAbbrevs(Abbreviations);
- }
-}
+void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
// Emit strings into a string section.
void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index b73d89b..d4d2ed2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -16,10 +16,10 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include <memory>
-#include <vector>
namespace llvm {
class AsmPrinter;
@@ -41,10 +41,7 @@ class DwarfFile {
BumpPtrAllocator AbbrevAllocator;
// Used to uniquely define abbreviations.
- FoldingSet<DIEAbbrev> AbbreviationsSet;
-
- // A list of all the unique abbreviations in use.
- std::vector<DIEAbbrev *> Abbreviations;
+ DIEAbbrevSet Abbrevs;
// A pointer to all units in the section.
SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
@@ -65,8 +62,6 @@ class DwarfFile {
public:
DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
- ~DwarfFile();
-
const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
return CUs;
}
@@ -81,12 +76,6 @@ public:
/// \returns The size of the root DIE.
unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
- /// Define a unique number for the abbreviation.
- ///
- /// Compute the abbreviation for \c Die, look up its unique number, and
- /// return a reference to it in the uniquing table.
- DIEAbbrev &assignAbbrevNumber(DIE &Die);
-
/// \brief Add a unit to the list of CUs.
void addUnit(std::unique_ptr<DwarfCompileUnit> U);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4100d72..2a866c0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,28 +12,33 @@
//===----------------------------------------------------------------------===//
#include "DwarfUnit.h"
-#include "DwarfAccelTable.h"
+#include "AddressPool.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
using namespace llvm;
@@ -46,18 +51,21 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
DIELoc &DIE)
- : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU),
+ : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
DIE(DIE) {}
void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
+
void DIEDwarfExpression::EmitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
+
void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
+
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
@@ -65,10 +73,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A),
- DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
- assert(UnitTag == dwarf::DW_TAG_compile_unit ||
- UnitTag == dwarf::DW_TAG_type_unit);
+ : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
+ Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@@ -77,7 +83,7 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
: DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
SplitLineTable(SplitLineTable) {
if (SplitLineTable)
- addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
+ addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
DwarfUnit::~DwarfUnit() {
@@ -194,6 +200,8 @@ void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
+ assert(Form != dwarf::DW_FORM_implicit_const &&
+ "DW_FORM_implicit_const is used only for signed integers");
Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
}
@@ -286,15 +294,15 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
- const DIE *DieCU = Die.getUnitOrNull();
- const DIE *EntryCU = Entry.getEntry().getUnitOrNull();
- if (!DieCU)
+ const DIEUnit *CU = Die.getUnit();
+ const DIEUnit *EntryCU = Entry.getEntry().getUnit();
+ if (!CU)
// We assume that Die belongs to this CU, if it is not linked to any CU yet.
- DieCU = &getUnitDie();
+ CU = getUnitDie().getUnit();
if (!EntryCU)
- EntryCU = &getUnitDie();
+ EntryCU = getUnitDie().getUnit();
Die.addValue(DIEValueAllocator, Attribute,
- EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
Entry);
}
@@ -365,21 +373,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
}
-bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
- unsigned SizeInBits, unsigned OffsetInBits) {
- DIEDwarfExpression Expr(*Asm, *this, TheDie);
- Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg,
- SizeInBits, OffsetInBits);
- return true;
-}
-
-bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
- int64_t Offset) {
- DIEDwarfExpression Expr(*Asm, *this, TheDie);
- return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Reg, Offset);
-}
-
/* Byref variables, in Blocks, are declared by the programmer as "SomeType
VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
gives the variable VarName either the struct, or a pointer to the struct, as
@@ -472,12 +465,17 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ SmallVector<uint64_t, 6> DIExpr;
+ DIEDwarfExpression Expr(*Asm, *this, *Loc);
bool validReg;
if (Location.isReg())
- validReg = addRegisterOpPiece(*Loc, Location.getReg());
+ validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Location.getReg());
else
- validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+ validReg =
+ Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Location.getReg(), Location.getOffset());
if (!validReg)
return;
@@ -485,27 +483,29 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ DIExpr.push_back(dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ DIExpr.push_back(dwarf::DW_OP_plus);
+ DIExpr.push_back(forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ DIExpr.push_back(dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset);
+ DIExpr.push_back(dwarf::DW_OP_plus);
+ DIExpr.push_back(varFieldOffset);
}
+ Expr.AddExpression(makeArrayRef(DIExpr));
+ Expr.finalize();
// Now attach the location information to the DIE.
addBlock(Die, Attribute, Loc);
@@ -538,7 +538,7 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
return true;
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type);
+ T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
DITypeRef Deriv = DTy->getBaseType();
assert(Deriv && "Expected valid base type");
return isUnsignedDIType(DD, DD->resolve(Deriv));
@@ -699,6 +699,10 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+ // DW_TAG_atomic_type is not supported in DWARF < 5
+ if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
+ return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
auto *Context = resolve(Ty->getScope());
@@ -735,7 +739,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
const DIType *Ty, const DIE &TyDIE) {
if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
- bool IsImplementation = 0;
+ bool IsImplementation = false;
if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
// A runtime language of 0 actually means C/C++ and that any
// non-negative value is some version of Objective-C/C++.
@@ -999,6 +1003,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (RLang)
addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
RLang);
+
+ // Add align info if available.
+ if (uint32_t AlignInBytes = CTy->getAlignInBytes())
+ addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
}
}
@@ -1066,6 +1075,8 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
DD->addAccelNamespace(Name, NDie);
addGlobalName(Name, NDie, NS->getScope());
addSourceLine(NDie, NS);
+ if (NS->getExportSymbols())
+ addFlag(NDie, dwarf::DW_AT_export_symbols);
return &NDie;
}
@@ -1133,7 +1144,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
assert(DeclDie && "This DIE should've already been constructed when the "
"definition DIE was created in "
"getOrCreateSubprogramDIE");
- DeclLinkageName = SPDecl->getLinkageName();
+ // Look at the Decl's linkage name only if we emitted it.
+ if (DD->useAllLinkageNames())
+ DeclLinkageName = SPDecl->getLinkageName();
unsigned DeclID =
getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
@@ -1248,6 +1261,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isRValueReference())
addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
+ if (SP->isNoReturn())
+ addFlag(SPDie, dwarf::DW_AT_noreturn);
+
if (SP->isProtected())
addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
@@ -1260,6 +1276,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isExplicit())
addFlag(SPDie, dwarf::DW_AT_explicit);
+
+ if (SP->isMainSubprogram())
+ addFlag(SPDie, dwarf::DW_AT_main_subprogram);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1288,7 +1307,7 @@ DIE *DwarfUnit::getIndexTyDie() {
if (IndexTyDie)
return IndexTyDie;
// Construct an integer type to use for indexes.
- IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
@@ -1383,6 +1402,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
} else {
uint64_t Size = DT->getSizeInBits();
uint64_t FieldSize = DD->getBaseTypeSize(DT);
+ uint32_t AlignInBytes = DT->getAlignInBytes();
uint64_t OffsetInBytes;
bool IsBitfield = FieldSize && Size != FieldSize;
@@ -1393,8 +1413,11 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
uint64_t Offset = DT->getOffsetInBits();
- uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
- uint64_t AlignMask = ~(Align - 1);
+ // We can't use DT->getAlignInBits() here: AlignInBits for member type
+ // is non-zero if and only if alignment was forced (e.g. _Alignas()),
+ // which can't be done with bitfields. Thus we use FieldSize here.
+ uint32_t AlignInBits = FieldSize;
+ uint32_t AlignMask = ~(AlignInBits - 1);
// The bits from the start of the storage unit to the start of the field.
uint64_t StartBitOffset = Offset - (Offset & AlignMask);
// The byte offset of the field's aligned storage unit inside the struct.
@@ -1417,6 +1440,9 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
} else {
// This is not a bitfield.
OffsetInBytes = DT->getOffsetInBits() / 8;
+ if (AlignInBytes)
+ addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
}
if (DD->getDwarfVersion() <= 2) {
@@ -1493,13 +1519,17 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
addConstantFPValue(StaticMemberDIE, CFP);
+ if (uint32_t AlignInBytes = DT->getAlignInBytes())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+
return &StaticMemberDIE;
}
void DwarfUnit::emitHeader(bool UseOffsets) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
- Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+ Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
Asm->EmitInt16(DD->getDwarfVersion());
@@ -1519,11 +1549,6 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
}
-void DwarfUnit::initSection(MCSection *Section) {
- assert(!this->Section);
- this->Section = Section;
-}
-
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
DwarfUnit::emitHeader(UseOffsets);
Asm->OutStreamer->AddComment("Type Signature");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index e225f92..8654d6f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -65,7 +65,7 @@ public:
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
/// source file.
-class DwarfUnit {
+ class DwarfUnit : public DIEUnit {
protected:
/// MDNode for the compile unit.
const DICompileUnit *CUNode;
@@ -73,9 +73,6 @@ protected:
// All DIEValues are allocated through this allocator.
BumpPtrAllocator DIEValueAllocator;
- /// Unit debug information entry.
- DIE &UnitDie;
-
/// Target of Dwarf emission.
AsmPrinter *Asm;
@@ -83,7 +80,7 @@ protected:
DwarfDebug *DD;
DwarfFile *DU;
- /// An anonymous type for index type. Owned by UnitDie.
+ /// An anonymous type for index type. Owned by DIEUnit.
DIE *IndexTyDie;
/// Tracks the mapping of unit level debug information variables to debug
@@ -101,9 +98,6 @@ protected:
/// corresponds to the MDNode mapped with the subprogram DIE.
DenseMap<DIE *, const DINode *> ContainingTypeMap;
- /// The section this unit will be emitted in.
- MCSection *Section;
-
DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU);
@@ -112,21 +106,13 @@ protected:
public:
virtual ~DwarfUnit();
- void initSection(MCSection *Section);
-
- MCSection *getSection() const {
- assert(Section);
- return Section;
- }
-
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
- DIE &getUnitDie() { return UnitDie; }
/// Return true if this compile unit has something to write out.
- bool hasContent() const { return UnitDie.hasChildren(); }
+ bool hasContent() const { return getUnitDie().hasChildren(); }
/// Get string containing language specific context for a global name.
///
@@ -249,17 +235,6 @@ public:
/// Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DINodeArray TParams);
- /// Add register operand.
- /// \returns false if the register does not exist, e.g., because it was never
- /// materialized.
- bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
- unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
-
- /// Add register offset.
- /// \returns false if the register does not exist, e.g., because it was never
- /// materialized.
- bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
-
// FIXME: Should be reformulated in terms of addComplexAddress.
/// Start with the address based on the location provided, and generate the
/// DWARF information necessary to find the actual Block variable (navigating
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e24dcb1..0a4a7a0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -74,7 +74,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
// output using a fixed width encoding. FilterOffsets[i] holds the byte
// offset corresponding to FilterIds[i].
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds();
SmallVector<int, 16> FilterOffsets;
FilterOffsets.reserve(FilterIds.size());
int Offset = -1;
@@ -296,7 +296,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
else {
// SjLj EH must maintain the call sites in the order assigned
// to them by the SjLjPrepare pass.
- unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel);
if (CallSites.size() < SiteNo)
CallSites.resize(SiteNo);
CallSites[SiteNo - 1] = Site;
@@ -336,9 +336,10 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
void EHStreamer::emitExceptionTable() {
- const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads();
// Sort the landing pads in order of their type ids. This is used to fold
// duplicate actions.
@@ -649,8 +650,9 @@ void EHStreamer::emitExceptionTable() {
}
void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
- const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const MachineFunction *MF = Asm->MF;
+ const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MF->getFilterIds();
bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c09ef6a..8baee4d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -50,7 +50,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
std::string SymName;
SymName += "caml";
size_t Letter = SymName.size();
- SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName.append(MId.begin(), find(MId, '.'));
SymName += "__";
SymName += Id;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index e5933d8..9d7c96a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -63,8 +63,8 @@ void WinException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
// If any landing pads survive, we need an EH table.
- bool hasLandingPads = !MMI->getLandingPads().empty();
- bool hasEHFunclets = MMI->hasEHFunclets();
+ bool hasLandingPads = !MF->getLandingPads().empty();
+ bool hasEHFunclets = MF->hasEHFunclets();
const Function *F = MF->getFunction();
@@ -72,17 +72,21 @@ void WinException::beginFunction(const MachineFunction *MF) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- const Function *Per = nullptr;
- if (F->hasPersonalityFn())
- Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
- bool forceEmitPersonality =
- F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
- F->needsUnwindTableEntry();
+ EHPersonality Per = EHPersonality::Unknown;
+ const Function *PerFn = nullptr;
+ if (F->hasPersonalityFn()) {
+ PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ Per = classifyEHPersonality(PerFn);
+ }
+
+ bool forceEmitPersonality = F->hasPersonalityFn() &&
+ !isNoOpWithoutInvoke(Per) &&
+ F->needsUnwindTableEntry();
shouldEmitPersonality =
forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
- PerEncoding != dwarf::DW_EH_PE_omit && Per);
+ PerEncoding != dwarf::DW_EH_PE_omit && PerFn);
unsigned LSDAEncoding = TLOF.getLSDAEncoding();
shouldEmitLSDA = shouldEmitPersonality &&
@@ -90,7 +94,16 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If we're not using CFI, we don't want the CFI or the personality, but we
// might want EH tables if we had EH pads.
- if (!Asm->MAI->usesWindowsCFI()) {
+ if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+ if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
+ // If this is 32-bit SEH and we don't have any funclets (really invokes),
+ // make sure we emit the parent offset label. Some unreferenced filter
+ // functions may still refer to it.
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ StringRef FLinkageName =
+ GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+ }
shouldEmitLSDA = hasEHFunclets;
shouldEmitPersonality = false;
return;
@@ -108,18 +121,20 @@ void WinException::endFunction(const MachineFunction *MF) {
const Function *F = MF->getFunction();
EHPersonality Per = EHPersonality::Unknown;
if (F->hasPersonalityFn())
- Per = classifyEHPersonality(F->getPersonalityFn());
+ Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
// Get rid of any dead landing pads if we're not using funclets. In funclet
// schemes, the landing pad is not actually reachable. It only exists so
// that we can emit the right table data.
- if (!isFuncletEHPersonality(Per))
- MMI->TidyLandingPads();
+ if (!isFuncletEHPersonality(Per)) {
+ MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF);
+ NonConstMF->tidyLandingPads();
+ }
endFunclet();
// endFunclet will emit the necessary .xdata tables for x64 SEH.
- if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+ if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
return;
if (shouldEmitPersonality || shouldEmitLSDA) {
@@ -147,7 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) {
}
}
-/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock.
static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
const MachineBasicBlock *MBB) {
if (!MBB)
@@ -193,8 +208,10 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
}
// Mark 'Sym' as starting our funclet.
- if (shouldEmitMoves || shouldEmitPersonality)
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+ }
if (shouldEmitPersonality) {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -204,16 +221,14 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
if (F->hasPersonalityFn())
PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
const MCSymbol *PersHandlerSym =
- TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
-
- // Classify the personality routine so that we may reason about it.
- EHPersonality Per = EHPersonality::Unknown;
- if (F->hasPersonalityFn())
- Per = classifyEHPersonality(F->getPersonalityFn());
-
- // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
- if (Per != EHPersonality::MSVC_CXX ||
- !CurrentFuncletEntry->isCleanupFuncletEntry())
+ TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
+
+ // Do not emit a .seh_handler directives for cleanup funclets.
+ // FIXME: This means cleanup funclets cannot handle exceptions. Given that
+ // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's
+ // inliner doesn't allow inlining them, this isn't a major problem in
+ // practice.
+ if (!CurrentFuncletEntry->isCleanupFuncletEntry())
Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
}
}
@@ -223,15 +238,12 @@ void WinException::endFunclet() {
if (!CurrentFuncletEntry)
return;
+ const MachineFunction *MF = Asm->MF;
if (shouldEmitMoves || shouldEmitPersonality) {
- const Function *F = Asm->MF->getFunction();
+ const Function *F = MF->getFunction();
EHPersonality Per = EHPersonality::Unknown;
if (F->hasPersonalityFn())
- Per = classifyEHPersonality(F->getPersonalityFn());
-
- // The .seh_handlerdata directive implicitly switches section, push the
- // current section so that we may return to it.
- Asm->OutStreamer->PushSection();
+ Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
// Emit an UNWIND_INFO struct describing the prologue.
Asm->OutStreamer->EmitWinEHHandlerData();
@@ -244,18 +256,17 @@ void WinException::endFunclet() {
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
- } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+ } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
// If this is the parent function in Win64 SEH, emit the LSDA immediately
// following .seh_handlerdata.
- emitCSpecificHandlerTable(Asm->MF);
+ emitCSpecificHandlerTable(MF);
}
- // Switch back to the previous section now that we are done writing to
- // .xdata.
- Asm->OutStreamer->PopSection();
-
- // Emit a .seh_endproc directive to mark the end of the function.
+ // Switch back to the funclet start .text section now that we are done
+ // writing to .xdata, and emit an .seh_endproc directive to mark the end of
+ // the function.
+ Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
Asm->OutStreamer->EmitWinCFIEndProc();
}
@@ -905,15 +916,24 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
// registration in order to recover the parent frame pointer. Now that we know
// we've code generated the parent, we can emit the label assignment that
// those helpers use to get the offset of the registration node.
+
+ // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if
+ // after optimization all the invokes were eliminated. We still need to emit
+ // the parent frame offset label, but it should be garbage and should never be
+ // used.
+ int64_t Offset = 0;
+ int FI = FuncInfo.EHRegNodeFrameIndex;
+ if (FI != INT_MAX) {
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ unsigned UnusedReg;
+ Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+ }
+
MCContext &Ctx = Asm->OutContext;
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- unsigned UnusedReg;
- const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- int64_t Offset = TFI->getFrameIndexReference(
- *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
- const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset,
+ MCConstantExpr::create(Offset, Ctx));
}
/// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -966,11 +986,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
// Retrieve the Guard Stack slot.
int GSCookieOffset = -2;
- const MachineFrameInfo *MFI = MF->getFrameInfo();
- if (MFI->hasStackProtectorIndex()) {
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (MFI.hasStackProtectorIndex()) {
unsigned UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- int SSPIdx = MFI->getStackProtectorIndex();
+ int SSPIdx = MFI.getStackProtectorIndex();
GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index acb3010..371061c 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
class GlobalValue;
class MachineFunction;
class MCExpr;
+class MCSection;
class Value;
struct WinEHFuncInfo;
@@ -40,6 +41,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+ /// The section of the last funclet start.
+ MCSection *CurrentFuncletTextSection = nullptr;
+
void emitCSpecificHandlerTable(const MachineFunction *MF);
void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 5dacbf9..6fba161 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -110,9 +110,12 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
- const MachineBranchProbabilityInfo &ProbInfo)
- : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
- MBPI(ProbInfo) {
+ const MachineBranchProbabilityInfo &ProbInfo,
+ unsigned MinTailLength)
+ : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
+ MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+ if (MinCommonTailLength == 0)
+ MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
@@ -141,59 +144,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MLI->removeBlock(MBB);
}
-/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
-/// followed by terminators, and if the implicitly defined registers are not
-/// used by the terminators, remove those implicit_def's. e.g.
-/// BB1:
-/// r0 = implicit_def
-/// r1 = implicit_def
-/// br
-/// This block can be optimized away later if the implicit instructions are
-/// removed.
-bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
- SmallSet<unsigned, 4> ImpDefRegs;
- MachineBasicBlock::iterator I = MBB->begin();
- while (I != MBB->end()) {
- if (!I->isImplicitDef())
- break;
- unsigned Reg = I->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ImpDefRegs.insert(*SubRegs);
- } else {
- ImpDefRegs.insert(Reg);
- }
- ++I;
- }
- if (ImpDefRegs.empty())
- return false;
-
- MachineBasicBlock::iterator FirstTerm = I;
- while (I != MBB->end()) {
- if (!TII->isUnpredicatedTerminator(*I))
- return false;
- // See if it uses any of the implicitly defined registers.
- for (const MachineOperand &MO : I->operands()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned Reg = MO.getReg();
- if (ImpDefRegs.count(Reg))
- return false;
- }
- ++I;
- }
-
- I = MBB->begin();
- while (I != FirstTerm) {
- MachineInstr *ImpDefMI = &*I;
- ++I;
- MBB->erase(ImpDefMI);
- }
-
- return true;
-}
-
/// OptimizeFunction - Perhaps branch folding, tail merging and other
/// CFG optimizations on the given function. Block placement changes the layout
/// and may create new tail merging opportunities.
@@ -224,7 +174,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
SmallVector<MachineOperand, 4> Cond;
if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
- MadeChange |= OptimizeImpDefsBlock(&MBB);
}
// Recalculate funclet membership.
@@ -399,37 +348,16 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) {
- if (!UpdateLiveIns)
- return;
-
- LiveRegs.init(TRI);
- LiveRegs.addLiveOutsNoPristines(MBB);
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
- LiveRegs.stepBackward(MI);
-
- for (unsigned Reg : LiveRegs) {
- // Skip the register if we are about to add one of its super registers.
- bool ContainsSuperReg = false;
- for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) {
- if (LiveRegs.contains(*SReg)) {
- ContainsSuperReg = true;
- break;
- }
- }
- if (ContainsSuperReg)
- continue;
- MBB.addLiveIn(Reg);
- }
-}
-
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
- computeLiveIns(*NewDest);
+ if (UpdateLiveIns) {
+ NewDest->clearLiveIns();
+ computeLiveIns(LiveRegs, *TRI, *NewDest);
+ }
++NumTailMerge;
}
@@ -467,7 +395,8 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// NewMBB inherits CurMBB's block frequency.
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
- computeLiveIns(*NewMBB);
+ if (UpdateLiveIns)
+ computeLiveIns(LiveRegs, *TRI, *NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -511,14 +440,14 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
- if (!TII->ReverseBranchCondition(Cond)) {
- TII->RemoveBranch(*CurMBB);
- TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+ if (!TII->reverseBranchCondition(Cond)) {
+ TII->removeBranch(*CurMBB);
+ TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
return;
}
}
}
- TII->InsertBranch(*CurMBB, SuccBB, nullptr,
+ TII->insertBranch(*CurMBB, SuccBB, nullptr,
SmallVector<MachineOperand, 0>(), dl);
}
@@ -591,13 +520,26 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
/// in each block.
+/// MBB1, MBB2 The blocks to check
+/// MinCommonTailLength Minimum size of tail block to be merged.
+/// CommonTailLen Out parameter to record the size of the shared tail between
+/// MBB1 and MBB2
+/// I1, I2 Iterator references that will be changed to point to the first
+/// instruction in the common tail shared by MBB1,MBB2
+/// SuccBB A common successor of MBB1, MBB2 which are in a canonical form
+/// relative to SuccBB
+/// PredBB The layout predecessor of SuccBB, if any.
+/// FuncletMembership map from block to funclet #.
+/// AfterPlacement True if we are merging blocks after layout. Stricter
+/// thresholds apply to prevent undoing tail-duplication.
static bool
ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
- unsigned minCommonTailLength, unsigned &CommonTailLen,
+ unsigned MinCommonTailLength, unsigned &CommonTailLen,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
- DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
+ bool AfterPlacement) {
// It is never profitable to tail-merge blocks from two different funclets.
if (!FuncletMembership.empty()) {
auto Funclet1 = FuncletMembership.find(MBB1);
@@ -617,7 +559,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
- if (MBB1 == PredBB || MBB2 == PredBB) {
+ // This is true only for a single successor. For multiple successors, we are
+ // trading a conditional branch for an unconditional one.
+ // TODO: Re-visit successor size for non-layout tail merging.
+ if ((MBB1 == PredBB || MBB2 == PredBB) &&
+ (!AfterPlacement || MBB1->succ_size() == 1)) {
MachineBasicBlock::iterator I;
unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
if (CommonTailLen > NumTerms)
@@ -635,15 +581,18 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// If both blocks have an unconditional branch temporarily stripped out,
// count that as an additional common instruction for the following
- // heuristics.
+ // heuristics. This heuristic is only accurate for single-succ blocks, so to
+ // make sure that during layout merging and duplicating don't crash, we check
+ // for that when merging during layout.
unsigned EffectiveTailLen = CommonTailLen;
if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ (MBB1->succ_size() == 1 || !AfterPlacement) &&
!MBB1->back().isBarrier() &&
!MBB2->back().isBarrier())
++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
- if (EffectiveTailLen >= minCommonTailLength)
+ if (EffectiveTailLen >= MinCommonTailLength)
return true;
// If we are optimizing for code size, 2 instructions in common is enough if
@@ -666,7 +615,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
/// those blocks appear in MergePotentials (where they are not necessarily
/// consecutive).
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
- unsigned minCommonTailLength,
+ unsigned MinCommonTailLength,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
unsigned maxCommonTailLength = 0U;
@@ -679,10 +628,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
unsigned CommonTailLen;
if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
- minCommonTailLength,
+ MinCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
- FuncletMembership)) {
+ FuncletMembership,
+ AfterBlockPlacement)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@@ -749,8 +699,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
SameTails[commonTailIndex].getTailStartPos();
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- // If the common tail includes any debug info we will take it pretty
- // randomly from one of the inputs. Might be better to remove it?
DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
<< maxCommonTailLength);
@@ -832,14 +780,13 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
// branch to Succ added (but the predecessor/successor lists need no
// adjustment). The lone predecessor of Succ that falls through into Succ,
// if any, is given in PredBB.
+// MinCommonTailLength - Except for the special cases below, tail-merge if
+// there are at least this many instructions in common.
bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
- MachineBasicBlock *PredBB) {
+ MachineBasicBlock *PredBB,
+ unsigned MinCommonTailLength) {
bool MadeChange = false;
- // Except for the special cases below, tail-merge if there are at least
- // this many instructions in common.
- unsigned minCommonTailLength = TailMergeSize;
-
DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
@@ -852,8 +799,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
<< PredBB->getNumber() << "\n";
}
dbgs() << "Looking for common tails of at least "
- << minCommonTailLength << " instruction"
- << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ << MinCommonTailLength << " instruction"
+ << (MinCommonTailLength == 1 ? "" : "s") << '\n';
);
// Sort by hash value so that blocks with identical end sequences sort
@@ -867,10 +814,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Build SameTails, identifying the set of blocks with this hash code
// and with the maximum number of instructions in common.
unsigned maxCommonTailLength = ComputeSameTails(CurHash,
- minCommonTailLength,
+ MinCommonTailLength,
SuccBB, PredBB);
- // If we didn't find any pair that has at least minCommonTailLength
+ // If we didn't find any pair that has at least MinCommonTailLength
// instructions in common, remove all blocks with this hash code and retry.
if (SameTails.empty()) {
RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -928,6 +875,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
+ // Remove the original debug location from the common tail.
+ for (auto &MI : *MBB)
+ if (!MI.isDebugValue())
+ MI.setDebugLoc(DebugLoc());
+
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
@@ -976,7 +928,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
}
// Look at blocks (IBB) with multiple predecessors (PBB).
@@ -1056,7 +1008,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
if (!Cond.empty() && TBB == IBB) {
- if (TII->ReverseBranchCondition(NewCond))
+ if (TII->reverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
if (!FBB) {
@@ -1092,10 +1044,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*PBB);
+ TII->removeBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+ TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
NewCond, dl);
}
@@ -1110,7 +1062,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
TriedMerging.insert(MergePotentials[i].getBlock());
if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
// Reinsert an unconditional branch if needed. The 1 below can occur as a
// result of removing blocks in TryTailMergeBlocks.
@@ -1311,10 +1263,10 @@ ReoptimizeBlock:
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
DebugLoc dl = getBranchDebugLoc(PrevBB);
- TII->RemoveBranch(PrevBB);
+ TII->removeBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
- TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1359,7 +1311,7 @@ ReoptimizeBlock:
// If the previous branch *only* branches to *this* block (conditional or
// not) remove the branch.
if (PriorTBB == MBB && !PriorFBB) {
- TII->RemoveBranch(PrevBB);
+ TII->removeBranch(PrevBB);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1369,8 +1321,8 @@ ReoptimizeBlock:
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
DebugLoc dl = getBranchDebugLoc(PrevBB);
- TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1381,10 +1333,10 @@ ReoptimizeBlock:
// fall-through.
if (PriorTBB == MBB) {
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
- if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ if (!TII->reverseBranchCondition(NewPriorCond)) {
DebugLoc dl = getBranchDebugLoc(PrevBB);
- TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1416,13 +1368,13 @@ ReoptimizeBlock:
if (DoTransform) {
// Reverse the branch so we will fall through on the previous true cond.
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
- if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ if (!TII->reverseBranchCondition(NewPriorCond)) {
DEBUG(dbgs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
DebugLoc dl = getBranchDebugLoc(PrevBB);
- TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
// Move this block to the end of the function.
MBB->moveAfter(&MF.back());
@@ -1450,10 +1402,10 @@ ReoptimizeBlock:
// Loop: xxx; jncc Loop; jmp Out
if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
SmallVector<MachineOperand, 4> NewCond(CurCond);
- if (!TII->ReverseBranchCondition(NewCond)) {
+ if (!TII->reverseBranchCondition(NewCond)) {
DebugLoc dl = getBranchDebugLoc(*MBB);
- TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ TII->removeBranch(*MBB);
+ TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1469,7 +1421,7 @@ ReoptimizeBlock:
// This block may contain just an unconditional branch. Because there can
// be 'non-branch terminators' in the block, try removing the branch and
// then seeing if the block is empty.
- TII->RemoveBranch(*MBB);
+ TII->removeBranch(*MBB);
// If the only things remaining in the block are debug info, remove these
// as well, so this will behave the same as an empty block in non-debug
// mode.
@@ -1500,8 +1452,8 @@ ReoptimizeBlock:
PriorFBB = MBB;
}
DebugLoc pdl = getBranchDebugLoc(PrevBB);
- TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+ TII->removeBranch(PrevBB);
+ TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1526,9 +1478,9 @@ ReoptimizeBlock:
*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
DebugLoc pdl = getBranchDebugLoc(*PMBB);
- TII->RemoveBranch(*PMBB);
+ TII->removeBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
+ TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
@@ -1548,7 +1500,7 @@ ReoptimizeBlock:
}
// Add the branch back if the block is more than just an uncond branch.
- TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+ TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
}
}
@@ -1585,7 +1537,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
+ TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
}
MBB->moveAfter(PredBB);
MadeChange = true;
@@ -1615,18 +1567,22 @@ ReoptimizeBlock:
// Okay, there is no really great place to put this block. If, however,
// the block before this one would be a fall-through if this block were
- // removed, move this block to the end of the function.
+ // removed, move this block to the end of the function. There is no real
+ // advantage in "falling through" to an EH block, so we don't want to
+ // perform this transformation for that case.
+ //
+ // Also, Windows EH introduced the possibility of an arbitrary number of
+ // successors to a given block. The analyzeBranch call does not consider
+ // exception handling and so we can get in a state where a block
+ // containing a call is followed by multiple EH blocks that would be
+ // rotated infinitely at the end of the function if the transformation
+ // below were performed for EH "FallThrough" blocks. Therefore, even if
+ // that appears not to be happening anymore, we should assume that it is
+ // possible and not remove the "!FallThrough()->isEHPad" condition below.
MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
- // We're looking for cases where PrevBB could possibly fall through to
- // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
- // so here we skip over any EH pads so we might have a chance to find
- // a branch target from PrevBB.
- while (FallThrough != MF.end() && FallThrough->isEHPad())
- ++FallThrough;
- // Now check to see if the current block is sitting between PrevBB and
- // a block to which it could fall through.
if (FallThrough != MF.end() &&
+ !FallThrough->isEHPad() &&
!TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(&*FallThrough)) {
MBB->moveAfter(&MF.back());
@@ -1720,10 +1676,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// The terminator is probably a conditional branch, try not to separate the
// branch from condition setting instruction.
- MachineBasicBlock::iterator PI = Loc;
- --PI;
- while (PI != MBB->begin() && PI->isDebugValue())
- --PI;
+ MachineBasicBlock::iterator PI =
+ skipDebugInstructionsBackward(std::prev(Loc), MBB->begin());
bool IsDef = false;
for (const MachineOperand &MO : PI->operands()) {
@@ -1817,18 +1771,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator FIE = FBB->end();
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- if (TIB->isDebugValue()) {
- while (TIB != TIE && TIB->isDebugValue())
- ++TIB;
- if (TIB == TIE)
- break;
- }
- if (FIB->isDebugValue()) {
- while (FIB != FIE && FIB->isDebugValue())
- ++FIB;
- if (FIB == FIE)
- break;
- }
+ TIB = skipDebugInstructionsForward(TIB, TIE);
+ FIB = skipDebugInstructionsForward(FIB, FIE);
+ if (TIB == TIE || FIB == FIE)
+ break;
+
if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
break;
@@ -1929,14 +1876,21 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
FBB->erase(FBB->begin(), FIB);
// Update livein's.
+ bool AddedLiveIns = false;
for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
unsigned Def = LocalDefs[i];
if (LocalDefsSet.count(Def)) {
TBB->addLiveIn(Def);
FBB->addLiveIn(Def);
+ AddedLiveIns = true;
}
}
+ if (AddedLiveIns) {
+ TBB->sortUniqueLiveIns();
+ FBB->sortUniqueLiveIns();
+ }
+
++NumHoist;
return true;
}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 36a5a2e..fc48e48 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -29,9 +29,13 @@ namespace llvm {
public:
class MBFIWrapper;
- explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ explicit BranchFolder(bool defaultEnableTailMerge,
+ bool CommonHoist,
MBFIWrapper &MBFI,
- const MachineBranchProbabilityInfo &MBPI);
+ const MachineBranchProbabilityInfo &MBPI,
+ // Min tail length to merge. Defaults to commandline
+ // flag. Ignored for optsize.
+ unsigned MinCommonTailLength = 0);
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
@@ -99,6 +103,7 @@ namespace llvm {
bool EnableTailMerge;
bool EnableHoistCommonCode;
bool UpdateLiveIns;
+ unsigned MinCommonTailLength;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
@@ -129,9 +134,9 @@ namespace llvm {
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB);
+ MachineBasicBlock* PredBB,
+ unsigned MinCommonTailLength);
void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
- void computeLiveIns(MachineBasicBlock &MBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
@@ -150,7 +155,6 @@ namespace llvm {
bool OptimizeBranches(MachineFunction &MF);
bool OptimizeBlock(MachineBasicBlock *MBB);
void RemoveDeadBlock(MachineBasicBlock *MBB);
- bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
bool HoistCommonCode(MachineFunction &MF);
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
new file mode 100644
index 0000000..8b27570
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -0,0 +1,510 @@
+//===-- BranchRelaxation.cpp ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-relaxation"
+
+STATISTIC(NumSplit, "Number of basic blocks split");
+STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed");
+STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
+
+#define BRANCH_RELAX_NAME "Branch relaxation pass"
+
+namespace {
+class BranchRelaxation : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ BasicBlockInfo() : Offset(0), Size(0) {}
+
+ /// Compute the offset immediately following this block. \p MBB is the next
+ /// block.
+ unsigned postOffset(const MachineBasicBlock &MBB) const {
+ unsigned PO = Offset + Size;
+ unsigned Align = MBB.getAlignment();
+ if (Align == 0)
+ return PO;
+
+ unsigned AlignAmt = 1 << Align;
+ unsigned ParentAlign = MBB.getParent()->getAlignment();
+ if (Align <= ParentAlign)
+ return PO + OffsetToAlignment(PO, AlignAmt);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt);
+ }
+ };
+
+ SmallVector<BasicBlockInfo, 16> BlockInfo;
+ std::unique_ptr<RegScavenger> RS;
+ LivePhysRegs LiveRegs;
+
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ bool relaxBranchInstructions();
+ void scanFunction();
+
+ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB);
+
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
+ MachineBasicBlock *DestBB);
+ void adjustBlockOffsets(MachineBasicBlock &MBB);
+ bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
+
+ bool fixupConditionalBranch(MachineInstr &MI);
+ bool fixupUnconditionalBranch(MachineInstr &MI);
+ uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
+ unsigned getInstrOffset(const MachineInstr &MI) const;
+ void dumpBBs();
+ void verify();
+
+public:
+ static char ID;
+ BranchRelaxation() : MachineFunctionPass(ID) { }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return BRANCH_RELAX_NAME;
+ }
+};
+
+}
+
+char BranchRelaxation::ID = 0;
+char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
+
+INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void BranchRelaxation::verify() {
+#ifndef NDEBUG
+ unsigned PrevNum = MF->begin()->getNumber();
+ for (MachineBasicBlock &MBB : *MF) {
+ unsigned Align = MBB.getAlignment();
+ unsigned Num = MBB.getNumber();
+ assert(BlockInfo[Num].Offset % (1u << Align) == 0);
+ assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
+ assert(BlockInfo[Num].Size == computeBlockSize(MBB));
+ PrevNum = Num;
+ }
+#endif
+}
+
+/// print block size and offset information - debugging
+void BranchRelaxation::dumpBBs() {
+ for (auto &MBB : *MF) {
+ const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
+ dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+ << format("size=%#x\n", BBI.Size);
+ }
+}
+
+/// scanFunction - Do the initial scan of the function, building up
+/// information about each block.
+void BranchRelaxation::scanFunction() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineBasicBlock &MBB : *MF)
+ BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+
+ // Compute block offsets and known bits.
+ adjustBlockOffsets(*MF->begin());
+}
+
+/// computeBlockSize - Compute the size for MBB.
+uint64_t BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) const {
+ uint64_t Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+}
+
+/// getInstrOffset - Return the current offset of the specified machine
+/// instruction from the start of the function. This offset changes as stuff is
+/// moved around inside the function.
+unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const {
+ const MachineBasicBlock *MBB = MI.getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+
+ return Offset;
+}
+
+void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+ unsigned PrevNum = Start.getNumber();
+ for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
+ unsigned Num = MBB.getNumber();
+ if (!Num) // block zero is never changed from offset zero.
+ continue;
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
+
+ PrevNum = Num;
+ }
+}
+
+ /// Insert a new empty basic block and insert it after \BB
+MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(BB.getBasicBlock());
+ MF->insert(++BB.getIterator(), NewBB);
+
+ // Insert an entry into BlockInfo to align it properly with the block numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ return NewBB;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
+ MachineBasicBlock *DestBB) {
+ MachineBasicBlock *OrigBB = MI.getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->insert(++OrigBB->getIterator(), NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc());
+
+ // Insert an entry into BlockInfo to align it properly with the block numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+
+ NewBB->transferSuccessors(OrigBB);
+ OrigBB->addSuccessor(NewBB);
+ OrigBB->addSuccessor(DestBB);
+
+ // Cleanup potential unconditional branch to successor block.
+ // Note that updateTerminator may change the size of the blocks.
+ NewBB->updateTerminator();
+ OrigBB->updateTerminator();
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBlockOffsets(*OrigBB);
+
+ // Need to fix live-in lists if we track liveness.
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeLiveIns(LiveRegs, *TRI, *NewBB);
+
+ ++NumSplit;
+
+ return NewBB;
+}
+
+/// isBlockInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool BranchRelaxation::isBlockInRange(
+ const MachineInstr &MI, const MachineBasicBlock &DestBB) const {
+ int64_t BrOffset = getInstrOffset(MI);
+ int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+
+ if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
+ return true;
+
+ DEBUG(
+ dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber()
+ << " from BB#" << MI.getParent()->getNumber()
+ << " to " << DestOffset
+ << " offset " << DestOffset - BrOffset
+ << '\t' << MI
+ );
+
+ return false;
+}
+
+/// fixupConditionalBranch - Fix up a conditional branch whose destination is
+/// too far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+
+ bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+ assert(!Fail && "branches to be relaxed must be analyzable");
+ (void)Fail;
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // tbz L1
+ // =>
+ // tbnz L2
+ // b L1
+ // L2:
+
+ if (FBB && isBlockInRange(MI, *FBB)) {
+ // Last MI in the BB is an unconditional branch. We can simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ DEBUG(dbgs() << " Invert condition and swap "
+ "its destination with " << MBB->back());
+
+ TII->reverseBranchCondition(Cond);
+ int OldSize = 0, NewSize = 0;
+ TII->removeBranch(*MBB, &OldSize);
+ TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize);
+
+ BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
+ return true;
+ } else if (FBB) {
+ // We need to split the basic block here to obtain two long-range
+ // unconditional branches.
+ auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF->insert(++MBB->getIterator(), &NewBB);
+
+ // Insert an entry into BlockInfo to align it properly with the block
+ // numbers.
+ BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo());
+
+ unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size;
+ int NewBrSize;
+ TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize);
+ NewBBSize += NewBrSize;
+
+ // Update the successor lists according to the transformation to follow.
+ // Do it here since if there's no split, no update is needed.
+ MBB->replaceSuccessor(FBB, &NewBB);
+ NewBB.addSuccessor(FBB);
+ }
+
+ // We now have an appropriate fall-through block in place (either naturally or
+ // just created), so we can invert the condition.
+ MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber()
+ << ", invert condition and change dest. to BB#"
+ << NextBB.getNumber() << '\n');
+
+ unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
+
+ // Insert a new conditional branch and a new unconditional branch.
+ int RemovedSize = 0;
+ TII->reverseBranchCondition(Cond);
+ TII->removeBranch(*MBB, &RemovedSize);
+ MBBSize -= RemovedSize;
+
+ int AddedSize = 0;
+ TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize);
+ MBBSize += AddedSize;
+
+ // Finally, keep the block offsets up to date.
+ adjustBlockOffsets(*MBB);
+ return true;
+}
+
+bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
+ MachineBasicBlock *MBB = MI.getParent();
+
+ unsigned OldBrSize = TII->getInstSizeInBytes(MI);
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+
+ int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+ int64_t SrcOffset = getInstrOffset(MI);
+
+ assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+
+ BlockInfo[MBB->getNumber()].Size -= OldBrSize;
+
+ MachineBasicBlock *BranchBB = MBB;
+
+ // If this was an expanded conditional branch, there is already a single
+ // unconditional branch in a block.
+ if (!MBB->empty()) {
+ BranchBB = createNewBlockAfter(*MBB);
+
+ // Add live outs.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins())
+ BranchBB->addLiveIn(LiveIn);
+ }
+
+ BranchBB->sortUniqueLiveIns();
+ BranchBB->addSuccessor(DestBB);
+ MBB->replaceSuccessor(DestBB, BranchBB);
+ }
+
+ DebugLoc DL = MI.getDebugLoc();
+ MI.eraseFromParent();
+ BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
+ *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+
+ adjustBlockOffsets(*MBB);
+ return true;
+}
+
+bool BranchRelaxation::relaxBranchInstructions() {
+ bool Changed = false;
+
+ // Relaxing branches involves creating new basic blocks, so re-eval
+ // end() for termination.
+ for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
+ MachineBasicBlock &MBB = *I;
+
+ // Empty block?
+ MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
+ if (Last == MBB.end())
+ continue;
+
+ // Expand the unconditional branch first if necessary. If there is a
+ // conditional branch, this will end up changing the branch destination of
+ // it to be over the newly inserted indirect branch block, which may avoid
+ // the need to try expanding the conditional branch first, saving an extra
+ // jump.
+ if (Last->isUnconditionalBranch()) {
+ // Unconditional branch destination might be unanalyzable, assume these
+ // are OK.
+ if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) {
+ if (!isBlockInRange(*Last, *DestBB)) {
+ fixupUnconditionalBranch(*Last);
+ ++NumUnconditionalRelaxed;
+ Changed = true;
+ }
+ }
+ }
+
+ // Loop over the conditional branches.
+ MachineBasicBlock::iterator Next;
+ for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+ J != MBB.end(); J = Next) {
+ Next = std::next(J);
+ MachineInstr &MI = *J;
+
+ if (MI.isConditionalBranch()) {
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ if (!isBlockInRange(MI, *DestBB)) {
+ if (Next != MBB.end() && Next->isConditionalBranch()) {
+ // If there are multiple conditional branches, this isn't an
+ // analyzable block. Split later terminators into a new block so
+ // each one will be analyzable.
+
+ splitBlockBeforeInstr(*Next, DestBB);
+ } else {
+ fixupConditionalBranch(MI);
+ ++NumConditionalRelaxed;
+ }
+
+ Changed = true;
+
+ // This may have modified all of the terminators, so start over.
+ Next = MBB.getFirstTerminator();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+
+ DEBUG(dbgs() << "***** BranchRelaxation *****\n");
+
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+
+ TRI = ST.getRegisterInfo();
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ RS.reset(new RegScavenger());
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block.
+ scanFunction();
+
+ DEBUG(dbgs() << " Basic blocks before relaxation\n"; dumpBBs(););
+
+ bool MadeChange = false;
+ while (relaxBranchInstructions())
+ MadeChange = true;
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify();
+
+ DEBUG(dbgs() << " Basic blocks after relaxation\n\n"; dumpBBs());
+
+ BlockInfo.clear();
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 7d67bcf..2e33f14 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -23,6 +23,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+
using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
@@ -64,6 +66,22 @@ void CCState::MarkAllocated(unsigned Reg) {
UsedRegs[*AI/32] |= 1 << (*AI&31);
}
+bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
+ if (!isAllocated(Reg))
+ return false;
+
+ for (auto const &ValAssign : Locs) {
+ if (ValAssign.isRegLoc()) {
+ for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
+ AI.isValid(); ++AI) {
+ if (*AI == Reg)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/// Analyze an array of argument values,
/// incorporating info about the formals into this state.
void
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 6679819..4cf9b13 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,9 @@ using namespace llvm;
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
initializeBranchFolderPassPass(Registry);
+ initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
+ initializeCountingFunctionInserterPass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
@@ -53,6 +55,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachinePipelinerPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
@@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
+ initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeShrinkWrapPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ede4041..934b470 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -17,12 +17,16 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -118,6 +122,19 @@ static cl::opt<bool> DisablePreheaderProtect(
"disable-preheader-prot", cl::Hidden, cl::init(false),
cl::desc("Disable protection against removing loop preheaders"));
+static cl::opt<bool> ProfileGuidedSectionPrefix(
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use profile info to add section prefix for hot/cold functions"));
+
+static cl::opt<unsigned> FreqRatioToSkipMerge(
+ "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
+ cl::desc("Skip merging empty blocks if (frequency of empty block) / "
+ "(frequency of destination block) is greater than this ratio"));
+
+static cl::opt<bool> ForceSplitStore(
+ "force-split-store", cl::Hidden, cl::init(false),
+ cl::desc("Force store splitting no matter what the target query says."));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -130,6 +147,8 @@ class TypePromotionTransaction;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -163,10 +182,11 @@ class TypePromotionTransaction;
}
bool runOnFunction(Function &F) override;
- const char *getPassName() const override { return "CodeGen Prepare"; }
+ StringRef getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: When we can selectively preserve passes, preserve the domtree.
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -175,8 +195,11 @@ class TypePromotionTransaction;
private:
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
+ BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
+ bool isPreheader);
bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
bool optimizeInst(Instruction *I, bool& ModifiedDT);
bool optimizeMemoryInst(Instruction *I, Value *Addr,
@@ -199,13 +222,15 @@ class TypePromotionTransaction;
unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
- void stripInvariantGroupMetadata(Instruction &I);
};
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
+INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
return new CodeGenPrepare(TM);
@@ -221,6 +246,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
+ BFI.reset();
+ BPI.reset();
ModifiedDT = false;
if (TM)
@@ -230,6 +257,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
OptSize = F.optForSize();
+ if (ProfileGuidedSectionPrefix) {
+ ProfileSummaryInfo *PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ if (PSI->isFunctionEntryHot(&F))
+ F.setSectionPrefix(".hot");
+ else if (PSI->isFunctionEntryCold(&F))
+ F.setSectionPrefix(".cold");
+ }
+
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
@@ -364,6 +400,38 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
return Changed;
}
+/// Find a destination block from BB if BB is mergeable empty block.
+BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ return nullptr;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI->getIterator();
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ return nullptr;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ return nullptr;
+
+ if (!canMergeBlocks(BB, DestBB))
+ DestBB = nullptr;
+
+ return DestBB;
+}
+
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -382,46 +450,106 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
// Note that this intentionally skips the entry block.
for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
BasicBlock *BB = &*I++;
+ BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
+ if (!DestBB ||
+ !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
+ continue;
+
+ eliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
+ BasicBlock *DestBB,
+ bool isPreheader) {
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && isPreheader &&
+ !(BB->getSinglePredecessor() &&
+ BB->getSinglePredecessor()->getSingleSuccessor()))
+ return false;
+
+ // Try to skip merging if the unique predecessor of BB is terminated by a
+ // switch or indirect branch instruction, and BB is used as an incoming block
+ // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
+ // add COPY instructions in the predecessor of BB instead of BB (if it is not
+ // merged). Note that the critical edge created by merging such blocks wont be
+ // split in MachineSink because the jump table is not analyzable. By keeping
+ // such empty block (BB), ISel will place COPY instructions in BB, not in the
+ // predecessor of BB.
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred ||
+ !(isa<SwitchInst>(Pred->getTerminator()) ||
+ isa<IndirectBrInst>(Pred->getTerminator())))
+ return true;
+
+ if (BB->getTerminator() != BB->getFirstNonPHI())
+ return true;
+
+ // We use a simple cost heuristic which determine skipping merging is
+ // profitable if the cost of skipping merging is less than the cost of
+ // merging : Cost(skipping merging) < Cost(merging BB), where the
+ // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
+ // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
+ // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
+ // Freq(Pred) / Freq(BB) > 2.
+ // Note that if there are multiple empty blocks sharing the same incoming
+ // value for the PHIs in the DestBB, we consider them together. In such
+ // case, Cost(merging BB) will be the sum of their frequencies.
+
+ if (!isa<PHINode>(DestBB->begin()))
+ return true;
- // If this block doesn't end with an uncond branch, ignore it.
- BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isUnconditional())
+ SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+
+ // Find all other incoming blocks from which incoming values of all PHIs in
+ // DestBB are the same as the ones from BB.
+ for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
+ ++PI) {
+ BasicBlock *DestBBPred = *PI;
+ if (DestBBPred == BB)
continue;
- // If the instruction before the branch (skipping debug info) isn't a phi
- // node, then other stuff is happening here.
- BasicBlock::iterator BBI = BI->getIterator();
- if (BBI != BB->begin()) {
- --BBI;
- while (isa<DbgInfoIntrinsic>(BBI)) {
- if (BBI == BB->begin())
- break;
- --BBI;
+ bool HasAllSameValue = true;
+ BasicBlock::const_iterator DestBBI = DestBB->begin();
+ while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
+ if (DestPN->getIncomingValueForBlock(BB) !=
+ DestPN->getIncomingValueForBlock(DestBBPred)) {
+ HasAllSameValue = false;
+ break;
}
- if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
- continue;
}
+ if (HasAllSameValue)
+ SameIncomingValueBBs.insert(DestBBPred);
+ }
- // Do not break infinite loops.
- BasicBlock *DestBB = BI->getSuccessor(0);
- if (DestBB == BB)
- continue;
+ // See if all BB's incoming values are same as the value from Pred. In this
+ // case, no reason to skip merging because COPYs are expected to be place in
+ // Pred already.
+ if (SameIncomingValueBBs.count(Pred))
+ return true;
- if (!canMergeBlocks(BB, DestBB))
- continue;
+ if (!BFI) {
+ Function &F = *BB->getParent();
+ LoopInfo LI{DominatorTree(F)};
+ BPI.reset(new BranchProbabilityInfo(F, LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+ }
- // Do not delete loop preheaders if doing so would create a critical edge.
- // Loop preheaders can be good locations to spill registers. If the
- // preheader is deleted and we create a critical edge, registers may be
- // spilled in the loop body instead.
- if (!DisablePreheaderProtect && Preheaders.count(BB) &&
- !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
- continue;
+ BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
+ BlockFrequency BBFreq = BFI->getBlockFreq(BB);
- eliminateMostlyEmptyBlock(BB);
- MadeChange = true;
- }
- return MadeChange;
+ for (auto SameValueBB : SameIncomingValueBBs)
+ if (SameValueBB->getUniquePredecessor() == Pred &&
+ DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
+ BBFreq += BFI->getBlockFreq(SameValueBB);
+
+ return PredFreq.getFrequency() <=
+ BBFreq.getFrequency() * FreqRatioToSkipMerge;
}
/// Return true if we can merge BB into DestBB if there is a single
@@ -805,6 +933,14 @@ static bool SinkCast(CastInst *CI) {
///
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
const DataLayout &DL) {
+ // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
+ // than sinking only nop casts, but is helpful on some platforms.
+ if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
+ if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
+ ASC->getDestAddressSpace()))
+ return false;
+ }
+
// If this is a noop copy,
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(DL, CI->getType());
@@ -925,6 +1061,8 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
InsertedCmp =
CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+ // Propagate the debug info.
+ InsertedCmp->setDebugLoc(CI->getDebugLoc());
}
// Replace a use of the cmp with a use of the new cmp.
@@ -1814,18 +1952,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
default: break;
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- uint64_t Size;
- Type *ReturnTy = CI->getType();
- Constant *RetVal = nullptr;
- ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
- ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
- if (getObjectSize(II->getArgOperand(0),
- Size, *DL, TLInfo, false, Mode)) {
- RetVal = ConstantInt::get(ReturnTy, Size);
- } else {
- RetVal = ConstantInt::get(ReturnTy,
- Mode == ObjSizeMode::Min ? 0 : -1ULL);
- }
+ ConstantInt *RetVal =
+ lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
@@ -1963,13 +2091,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!TLI)
return false;
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
- if (!RI)
+ ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RetI)
return false;
PHINode *PN = nullptr;
BitCastInst *BCI = nullptr;
- Value *V = RI->getReturnValue();
+ Value *V = RetI->getReturnValue();
if (V) {
BCI = dyn_cast<BitCastInst>(V);
if (BCI)
@@ -1983,14 +2111,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (PN && PN->getParent() != BB)
return false;
- // It's not safe to eliminate the sign / zero extension of the return value.
- // See llvm::isInTailCallPosition().
- const Function *F = BB->getParent();
- AttributeSet CallerAttrs = F->getAttributes();
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
- return false;
-
// Make sure there are no instructions between the PHI and return, or that the
// return is the first instruction in the block.
if (PN) {
@@ -1999,24 +2119,26 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
if (&*BI == BCI)
// Also skip over the bitcast.
++BI;
- if (&*BI != RI)
+ if (&*BI != RetI)
return false;
} else {
BasicBlock::iterator BI = BB->begin();
while (isa<DbgInfoIntrinsic>(BI)) ++BI;
- if (&*BI != RI)
+ if (&*BI != RetI)
return false;
}
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
+ const Function *F = BB->getParent();
SmallVector<CallInst*, 4> TailCalls;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
- TLI->mayBeEmittedAsTailCall(CI))
+ TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
TailCalls.push_back(CI);
}
} else {
@@ -2033,7 +2155,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
continue;
CallInst *CI = dyn_cast<CallInst>(&*RI);
- if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
TailCalls.push_back(CI);
}
}
@@ -2060,7 +2183,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
continue;
// Duplicate the return into CallBB.
- (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
ModifiedDT = Changed = true;
++NumRetsDup;
}
@@ -3237,7 +3360,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
int64_t ConstantOffset = 0;
gep_type_iterator GTI = gep_type_begin(AddrInst);
for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx =
cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
@@ -3665,8 +3788,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
TPT.rollback(LastKnownGood);
// If the match didn't cover I, then it won't be shared by it.
- if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
- I) == MatchedAddrModeInsts.end())
+ if (!is_contained(MatchedAddrModeInsts, I))
return false;
MatchedAddrModeInsts.clear();
@@ -3791,18 +3913,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
}
TPT.commit();
- // Check to see if any of the instructions supersumed by this addr mode are
- // non-local to I's BB.
- bool AnyNonLocal = false;
- for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
- if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
- AnyNonLocal = true;
- break;
- }
- }
-
// If all the instructions matched are already in this BB, don't do anything.
- if (!AnyNonLocal) {
+ if (none_of(AddrModeInsts, [&](Value *V) {
+ return IsNonLocalValue(V, MemoryInst->getParent());
+ })) {
DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
@@ -4217,6 +4331,10 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
/// promotions apply.
///
bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
+ // ExtLoad formation infrastructure requires TLI to be effective.
+ if (!TLI)
+ return false;
+
// Try to promote a chain of computation if it allows to form
// an extended load.
TypePromotionTransaction TPT;
@@ -4246,7 +4364,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() && TLI &&
+ if (!LI->hasOneUse() &&
(TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
!TLI->isTruncateFree(I->getType(), LI->getType())) {
I = OldExt;
@@ -4262,7 +4380,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+ if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
I = OldExt;
TPT.rollback(LastKnownGood);
return false;
@@ -4273,6 +4391,14 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
TPT.commit();
I->removeFromParent();
I->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location would
+ // pessimize the debugging experience, as well as negatively impact the
+ // quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug location
+ // for the load and the zext.
+ I->setDebugLoc(LI->getDebugLoc());
++NumExtsMoved;
return true;
}
@@ -4583,10 +4709,45 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
return false;
}
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *getTrueOrFalseValue(
+ SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
+ Value *V;
+
+ for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+ DefSI = dyn_cast<SelectInst>(V)) {
+ assert(DefSI->getCondition() == SI->getCondition() &&
+ "The condition of DefSI does not match with SI");
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ }
+ return V;
+}
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+ // Find all consecutive select instructions that share the same condition.
+ SmallVector<SelectInst *, 2> ASI;
+ ASI.push_back(SI);
+ for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
+ It != SI->getParent()->end(); ++It) {
+ SelectInst *I = dyn_cast<SelectInst>(&*It);
+ if (I && SI->getCondition() == I->getCondition()) {
+ ASI.push_back(I);
+ } else {
+ break;
+ }
+ }
+
+ SelectInst *LastSI = ASI.back();
+ // Increment the current iterator to skip all the rest of select instructions
+ // because they will be either "not lowered" or "all lowered" to branch.
+ CurInstIterator = std::next(LastSI->getIterator());
+
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
// Can we convert the 'select' to CF ?
@@ -4633,7 +4794,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// First, we split the block containing the select into 2 blocks.
BasicBlock *StartBlock = SI->getParent();
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
// Delete the unconditional branch that was just created by the split.
@@ -4643,22 +4804,30 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// At least one will become an actual new basic block.
BasicBlock *TrueBlock = nullptr;
BasicBlock *FalseBlock = nullptr;
+ BranchInst *TrueBranch = nullptr;
+ BranchInst *FalseBranch = nullptr;
// Sink expensive instructions into the conditional blocks to avoid executing
// them speculatively.
- if (sinkSelectOperand(TTI, SI->getTrueValue())) {
- TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
- EndBlock->getParent(), EndBlock);
- auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
- auto *TrueInst = cast<Instruction>(SI->getTrueValue());
- TrueInst->moveBefore(TrueBranch);
- }
- if (sinkSelectOperand(TTI, SI->getFalseValue())) {
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
- EndBlock->getParent(), EndBlock);
- auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- auto *FalseInst = cast<Instruction>(SI->getFalseValue());
- FalseInst->moveBefore(FalseBranch);
+ for (SelectInst *SI : ASI) {
+ if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+ if (TrueBlock == nullptr) {
+ TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ }
+ auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+ if (FalseBlock == nullptr) {
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ }
+ auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+ FalseInst->moveBefore(FalseBranch);
+ }
}
// If there was nothing to sink, then arbitrarily choose the 'false' side
@@ -4677,28 +4846,42 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// of the condition, it means that side of the branch goes to the end block
// directly and the path originates from the start block from the point of
// view of the new PHI.
+ BasicBlock *TT, *FT;
if (TrueBlock == nullptr) {
- BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+ TT = EndBlock;
+ FT = FalseBlock;
TrueBlock = StartBlock;
} else if (FalseBlock == nullptr) {
- BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+ TT = TrueBlock;
+ FT = EndBlock;
FalseBlock = StartBlock;
} else {
- BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+ TT = TrueBlock;
+ FT = FalseBlock;
+ }
+ IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
+
+ SmallPtrSet<const Instruction *, 2> INS;
+ INS.insert(ASI.begin(), ASI.end());
+ // Use reverse iterator because later select may use the value of the
+ // earlier select, and we need to propagate value through earlier select
+ // to get the PHI operand.
+ for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+ SelectInst *SI = *It;
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+ PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+ INS.erase(SI);
+ ++NumSelectsExpanded;
}
-
- // The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
- PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), TrueBlock);
- PN->addIncoming(SI->getFalseValue(), FalseBlock);
-
- SI->replaceAllUsesWith(PN);
- SI->eraseFromParent();
// Instruct OptimizeBlock to skip to the next block.
CurInstIterator = StartBlock->end();
- ++NumSelectsExpanded;
return true;
}
@@ -5179,6 +5362,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
return false;
}
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+/// Although we already have similar splitting in DAG Combine, we duplicate
+/// it in CodeGenPrepare to catch the case in which pattern is across
+/// multiple BBs. The logic in DAG Combine is kept to catch case generated
+/// during code expansion.
+static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
+ const TargetLowering &TLI) {
+ // Handle simple but common cases only.
+ Type *StoreType = SI.getValueOperand()->getType();
+ if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+ DL.getTypeSizeInBits(StoreType) == 0)
+ return false;
+
+ unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
+ Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
+ if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
+ DL.getTypeSizeInBits(SplitStoreType))
+ return false;
+
+ // Match the following patterns:
+ // (store (or (zext LValue to i64),
+ // (shl (zext HValue to i64), 32)), HalfValBitSize)
+ // or
+ // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
+ // (zext LValue to i64),
+ // Expect both operands of OR and the first operand of SHL have only
+ // one use.
+ Value *LValue, *HValue;
+ if (!match(SI.getValueOperand(),
+ m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
+ m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
+ m_SpecificInt(HalfValBitSize))))))
+ return false;
+
+ // Check LValue and HValue are int with size less or equal than 32.
+ if (!LValue->getType()->isIntegerTy() ||
+ DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
+ !HValue->getType()->isIntegerTy() ||
+ DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
+ return false;
+
+ // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
+ // as the input of target query.
+ auto *LBC = dyn_cast<BitCastInst>(LValue);
+ auto *HBC = dyn_cast<BitCastInst>(HValue);
+ EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
+ : EVT::getEVT(LValue->getType());
+ EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
+ : EVT::getEVT(HValue->getType());
+ if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return false;
+
+ // Start to split store.
+ IRBuilder<> Builder(SI.getContext());
+ Builder.SetInsertPoint(&SI);
+
+ // If LValue/HValue is a bitcast in another BB, create a new one in current
+ // BB so it may be merged with the splitted stores by dag combiner.
+ if (LBC && LBC->getParent() != SI.getParent())
+ LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
+ if (HBC && HBC->getParent() != SI.getParent())
+ HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
+
+ auto CreateSplitStore = [&](Value *V, bool Upper) {
+ V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
+ Value *Addr = Builder.CreateBitCast(
+ SI.getOperand(1),
+ SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+ if (Upper)
+ Addr = Builder.CreateGEP(
+ SplitStoreType, Addr,
+ ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+ Builder.CreateAlignedStore(
+ V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+ };
+
+ CreateSplitStore(LValue, false);
+ CreateSplitStore(HValue, true);
+
+ // Delete the old store.
+ SI.eraseFromParent();
+ return true;
+}
+
bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
@@ -5232,7 +5526,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
return OptimizeCmpExpression(CI, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- stripInvariantGroupMetadata(*LI);
+ LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
if (TLI) {
bool Modified = optimizeLoadExt(LI);
unsigned AS = LI->getPointerAddressSpace();
@@ -5243,7 +5537,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- stripInvariantGroupMetadata(*SI);
+ if (TLI && splitMergedValStore(*SI, *DL, *TLI))
+ return true;
+ SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
if (TLI) {
unsigned AS = SI->getPointerAddressSpace();
return optimizeMemoryInst(I, SI->getOperand(1),
@@ -5542,7 +5838,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
// incoming edge to the PHI nodes, because both branch instructions target
// now the same successor. Depending on the original branch condition
// (and/or) we have to swap the successors (TrueDest, FalseDest), so that
- // we perfrom the correct update for the PHI nodes.
+ // we perform the correct update for the PHI nodes.
// This doesn't change the successor order of the just created branch
// instruction (or any other instruction).
if (Opc == Instruction::Or)
@@ -5649,8 +5945,3 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
return MadeChange;
}
-
-void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
- if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
- I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
-}
diff --git a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
new file mode 100644
index 0000000..1e46a7a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
@@ -0,0 +1,62 @@
+//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Insert calls to counter functions, such as mcount, intended to be called
+// once per function, at the beginning of each function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ struct CountingFunctionInserter : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ CountingFunctionInserter() : FunctionPass(ID) {
+ initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ std::string CountingFunctionName =
+ F.getFnAttribute("counting-function").getValueAsString();
+ if (CountingFunctionName.empty())
+ return false;
+
+ Type *VoidTy = Type::getVoidTy(F.getContext());
+ Constant *CountingFn =
+ F.getParent()->getOrInsertFunction(CountingFunctionName,
+ VoidTy, nullptr);
+ CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
+ return true;
+ }
+ };
+
+ char CountingFunctionInserter::ID = 0;
+}
+
+INITIALIZE_PASS(CountingFunctionInserter, "cfinserter",
+ "Inserts calls to mcount-like functions", false, false)
+
+//===----------------------------------------------------------------------===//
+//
+// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createCountingFunctionInserterPass() {
+ return new CountingFunctionInserter();
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a0189a1..5d60c30 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -69,8 +69,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
// callee-saved register that is not saved in the prolog.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- BitVector Pristine = MFI->getPristineRegs(MF);
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI.getPristineRegs(MF);
for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
if (!IsReturnBlock && !Pristine.test(*I)) continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 2386af9..7b1b2d6 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -31,9 +31,14 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
+ cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+static unsigned InstrCount = 0;
+
// --------------------------------------------------------------------
// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
@@ -218,6 +223,13 @@ VLIWPacketizerList::~VLIWPacketizerList() {
// End the current packet, bundle packet instructions and reset DFA state.
void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
MachineBasicBlock::iterator MI) {
+ DEBUG({
+ if (!CurrentPacketMIs.empty()) {
+ dbgs() << "Finalizing packet:\n";
+ for (MachineInstr *MI : CurrentPacketMIs)
+ dbgs() << " * " << *MI;
+ }
+ });
if (CurrentPacketMIs.size() > 1) {
MachineInstr &MIFirst = *CurrentPacketMIs.front();
finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
@@ -249,8 +261,17 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
for (SUnit &SU : VLIWScheduler->SUnits)
MIToSUnit[SU.getInstr()] = &SU;
+ bool LimitPresent = InstrLimit.getPosition();
+
// The main packetizer loop.
for (; BeginItr != EndItr; ++BeginItr) {
+ if (LimitPresent) {
+ if (InstrCount >= InstrLimit) {
+ EndItr = BeginItr;
+ break;
+ }
+ InstrCount++;
+ }
MachineInstr &MI = *BeginItr;
initPacketizerState();
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0b8dc7a..17c229a 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -122,7 +122,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// liveness as we go.
for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
MIE = MBB.rend(); MII != MIE; ) {
- MachineInstr *MI = &*MII;
+ MachineInstr *MI = &*MII++;
// If the instruction is dead, delete it!
if (isDead(MI)) {
@@ -133,9 +133,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
MI->eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
- MIE = MBB.rend();
- // MII is now pointing to the next instruction to process,
- // so don't increment it.
continue;
}
@@ -169,10 +166,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
- // We didn't delete the current instruction, so increment MII to
- // the next one.
- ++MII;
}
}
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 1d9e79c..a7ba694 100644
--- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -63,7 +63,7 @@ public:
static char ID;
DetectDeadLanes() : MachineFunctionPass(ID) {}
- const char *getPassName() const override { return "Detect Dead Lanes"; }
+ StringRef getPassName() const override { return "Detect Dead Lanes"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -210,7 +210,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
VRegInfo &MORegInfo = VRegInfos[MORegIdx];
LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
// Any change at all?
- if ((UsedLanes & ~PrevUsedLanes) == 0)
+ if ((UsedLanes & ~PrevUsedLanes).none())
return;
// Set UsedLanes and remember instruction for further propagation.
@@ -303,7 +303,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
VRegInfo &RegInfo = VRegInfos[DefRegIdx];
LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
// Any change at all?
- if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+ if ((DefinedLanes & ~PrevDefinedLanes).none())
return;
RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
@@ -356,7 +356,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
// Live-In or unused registers have no definition but are considered fully
// defined.
if (!MRI->hasOneDef(Reg))
- return ~0u;
+ return LaneBitmask::getAll();
const MachineOperand &Def = *MRI->def_begin(Reg);
const MachineInstr &DefMI = *Def.getParent();
@@ -368,7 +368,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
PutInWorklist(RegIdx);
if (Def.isDead())
- return 0;
+ return LaneBitmask::getNone();
// COPY/PHI can copy across unrelated register classes (example: float/int)
// with incompatible subregister structure. Do not include these in the
@@ -376,7 +376,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
// Determine initially DefinedLanes.
- LaneBitmask DefinedLanes = 0;
+ LaneBitmask DefinedLanes;
for (const MachineOperand &MO : DefMI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
@@ -386,9 +386,9 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
LaneBitmask MODefinedLanes;
if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
- MODefinedLanes = ~0u;
+ MODefinedLanes = LaneBitmask::getAll();
} else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
- MODefinedLanes = ~0u;
+ MODefinedLanes = LaneBitmask::getAll();
} else {
assert(TargetRegisterInfo::isVirtualRegister(MOReg));
if (MRI->hasOneDef(MOReg)) {
@@ -410,7 +410,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
return DefinedLanes;
}
if (DefMI.isImplicitDef() || Def.isDead())
- return 0;
+ return LaneBitmask::getNone();
assert(Def.getSubReg() == 0 &&
"Should not have subregister defs in machine SSA phase");
@@ -418,7 +418,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
}
LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
- LaneBitmask UsedLanes = 0;
+ LaneBitmask UsedLanes = LaneBitmask::getNone();
for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
if (!MO.readsReg())
continue;
@@ -462,7 +462,7 @@ bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
const VRegInfo &RegInfo) const {
unsigned SubReg = MO.getSubReg();
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
- return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0;
+ return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
}
bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
@@ -482,7 +482,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
- if (UsedLanes != 0)
+ if (UsedLanes.any())
return false;
unsigned MOReg = MO.getReg();
@@ -546,7 +546,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
continue;
unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
const VRegInfo &RegInfo = VRegInfos[RegIdx];
- if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) {
+ if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
MO.setIsDead();
}
@@ -577,12 +577,12 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
// register coalescer cannot deal with hidden dead defs. However without
// subregister liveness enabled, the expected benefits of this pass are small
// so we safe the compile time.
- if (!MF.getSubtarget().enableSubRegLiveness()) {
+ MRI = &MF.getRegInfo();
+ if (!MRI->subRegLivenessEnabled()) {
DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
return false;
}
- MRI = &MF.getRegInfo();
TRI = MRI->getTargetRegisterInfo();
unsigned NumVirtRegs = MRI->getNumVirtRegs();
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index eae78a9..38af19a 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -71,7 +71,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override;
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Exception handling preparation";
}
};
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 8c96124..7291727 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -547,7 +547,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// Fix up Head's terminators.
// It should become a single branch or a fallthrough.
DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
- TII->RemoveBranch(*Head);
+ TII->removeBranch(*Head);
// Erase the now empty conditional blocks. It is likely that Head can fall
// through to Tail, and we can join the two blocks.
@@ -574,7 +574,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// We need a branch to Tail, let code placement work it out later.
DEBUG(dbgs() << "Converting to unconditional branch.\n");
SmallVector<MachineOperand, 0> EmptyCond;
- TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
+ TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
Head->addSuccessor(Tail);
}
DEBUG(dbgs() << *Head);
@@ -602,7 +602,7 @@ public:
EarlyIfConverter() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const override { return "Early If-Conversion"; }
+ StringRef getPassName() const override { return "Early If-Conversion"; }
private:
bool tryConvertIf(MachineBasicBlock*);
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
index aea7c31..b3a25544 100644
--- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -57,8 +58,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
Blocks.resize(getNumBundles());
for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
- unsigned b0 = getBundle(i, 0);
- unsigned b1 = getBundle(i, 1);
+ unsigned b0 = getBundle(i, false);
+ unsigned b1 = getBundle(i, true);
Blocks[b0].push_back(i);
if (b1 != b0)
Blocks[b1].push_back(i);
@@ -69,6 +70,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
/// Specialize WriteGraph, the standard implementation won't work.
namespace llvm {
+
template<>
raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
bool ShortNames,
@@ -89,7 +91,8 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
O << "}\n";
return O;
}
-}
+
+} // end namespace llvm
/// view - Visualize the annotated bipartite CFG with Graphviz.
void EdgeBundles::view() const {
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 566b8d5..32c57e3 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -137,6 +138,7 @@ class ExeDepsFix : public MachineFunctionPass {
MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ RegisterClassInfo RegClassInfo;
std::vector<SmallVector<int, 1>> AliasMap;
const unsigned NumRegs;
LiveReg *LiveRegs;
@@ -170,12 +172,10 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
- const char *getPassName() const override {
- return "Execution dependency fix";
- }
+ StringRef getPassName() const override { return "Execution dependency fix"; }
private:
iterator_range<SmallVectorImpl<int>::const_iterator>
@@ -203,6 +203,8 @@ private:
void processDefs(MachineInstr*, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
+ void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref);
bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
void processUndefReads(MachineBasicBlock*);
};
@@ -473,6 +475,60 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
processDefs(MI, !DomP.first);
}
+/// \brief Helps avoid false dependencies on undef registers by updating the
+/// machine instructions' undef operand to use a register that the instruction
+/// is truly dependent on, or use a register with clearance higher than Pref.
+void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ assert(MO.isUndef() && "Expected undef machine operand");
+
+ unsigned OriginalReg = MO.getReg();
+
+ // Update only undef operands that are mapped to one register.
+ if (AliasMap[OriginalReg].size() != 1)
+ return;
+
+ // Get the undef operand's register class
+ const TargetRegisterClass *OpRC =
+ TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+
+ // If the instruction has a true dependency, we can hide the false depdency
+ // behind it.
+ for (MachineOperand &CurrMO : MI->operands()) {
+ if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
+ !OpRC->contains(CurrMO.getReg()))
+ continue;
+ // We found a true dependency - replace the undef register with the true
+ // dependency.
+ MO.setReg(CurrMO.getReg());
+ return;
+ }
+
+ // Go over all registers in the register class and find the register with
+ // max clearance or clearance higher than Pref.
+ unsigned MaxClearance = 0;
+ unsigned MaxClearanceReg = OriginalReg;
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
+ for (auto Reg : Order) {
+ assert(AliasMap[Reg].size() == 1 &&
+ "Reg is expected to be mapped to a single index");
+ int RCrx = *regIndices(Reg).begin();
+ unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
+ if (Clearance <= MaxClearance)
+ continue;
+ MaxClearance = Clearance;
+ MaxClearanceReg = Reg;
+
+ if (MaxClearance > Pref)
+ break;
+ }
+
+ // Update the operand if we found a register with better clearance.
+ if (MaxClearanceReg != OriginalReg)
+ MO.setReg(MaxClearanceReg);
+}
+
/// \brief Return true to if it makes sense to break dependence on a partial def
/// or undef use.
bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
@@ -510,6 +566,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
unsigned OpNum;
unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
+ pickBestRegisterForUndef(MI, OpNum, Pref);
if (shouldBreakDependence(MI, OpNum, Pref))
UndefReads.push_back(std::make_pair(MI, OpNum));
}
@@ -520,8 +577,6 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
- if (MO.isImplicit())
- break;
if (MO.isUse())
continue;
for (int rx : regIndices(MO.getReg())) {
@@ -557,7 +612,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
return;
// Collect this block's live out register units.
- LiveRegSet.init(TRI);
+ LiveRegSet.init(*TRI);
// We do not need to care about pristine registers as they are just preserved
// but not actually used in the function.
LiveRegSet.addLiveOutsNoPristines(*MBB);
@@ -652,9 +707,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
- SmallVector<LiveReg, 4> Regs;
- for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
- int rx = *i;
+ SmallVector<const LiveReg *, 4> Regs;
+ for (int rx : used) {
assert(LiveRegs && "no space allocated for live registers");
const LiveReg &LR = LiveRegs[rx];
// This useless DomainValue could have been missed above.
@@ -663,16 +717,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
continue;
}
// Sorted insertion.
- bool Inserted = false;
- for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
- i != e && !Inserted; ++i) {
- if (LR.Def < i->Def) {
- Inserted = true;
- Regs.insert(i, LR);
- }
- }
- if (!Inserted)
- Regs.push_back(LR);
+ auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR,
+ [](const LiveReg *LHS, const LiveReg *RHS) {
+ return LHS->Def < RHS->Def;
+ });
+ Regs.insert(I, &LR);
}
// doms are now sorted in order of appearance. Try to merge them all, giving
@@ -680,14 +729,14 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
DomainValue *dv = nullptr;
while (!Regs.empty()) {
if (!dv) {
- dv = Regs.pop_back_val().Value;
+ dv = Regs.pop_back_val()->Value;
// Force the first dv to match the current instruction.
dv->AvailableDomains = dv->getCommonDomains(available);
assert(dv->AvailableDomains && "Domain should have been filtered");
continue;
}
- DomainValue *Latest = Regs.pop_back_val().Value;
+ DomainValue *Latest = Regs.pop_back_val()->Value;
// Skip already merged values.
if (Latest == dv || Latest->Next)
continue;
@@ -731,6 +780,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = MF->getSubtarget().getInstrInfo();
TRI = MF->getSubtarget().getRegisterInfo();
+ RegClassInfo.runOnMachineFunction(mf);
LiveRegs = nullptr;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index b16f81c..d61afad 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -30,7 +30,7 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
};
}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index c8116a4..be21c73 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -32,7 +32,7 @@ class Printer : public FunctionPass {
public:
explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
- const char *getPassName() const override;
+ StringRef getPassName() const override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
@@ -87,7 +87,7 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
return new Printer(OS);
}
-const char *Printer::getPassName() const {
+StringRef Printer::getPassName() const {
return "Print Garbage Collector Information";
}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index bb8cfa1..d183c7f 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,6 +14,8 @@
#include "llvm/CodeGen/GCMetadataPrinter.h"
using namespace llvm;
+LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
+
GCMetadataPrinter::GCMetadataPrinter() {}
GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 326adab..3524654 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -45,7 +45,7 @@ public:
static char ID;
LowerIntrinsics();
- const char *getPassName() const override;
+ StringRef getPassName() const override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool doInitialization(Module &M) override;
@@ -93,7 +93,7 @@ LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
}
-const char *LowerIntrinsics::getPassName() const {
+StringRef LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
}
@@ -316,7 +316,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
RI != FI->roots_end();) {
// If the root references a dead object, no need to keep it.
- if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) {
RI = FI->removeStackRoot(RI);
} else {
unsigned FrameReg; // FIXME: surely GCRoot ought to store the
@@ -338,11 +338,11 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
// Find the size of the stack frame. There may be no correct static frame
// size, we use UINT64_MAX to represent this.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+ const bool DynamicFrameSize = MFI.hasVarSizedObjects() ||
RegInfo->needsStackRealignment(MF);
- FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+ FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize());
// Find all safe points.
if (FI->getStrategy().needsSafePoints())
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 554d326..31ab86f 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -16,6 +16,8 @@
using namespace llvm;
+LLVM_INSTANTIATE_REGISTRY(GCRegistry)
+
GCStrategy::GCStrategy()
: UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
new file mode 100644
index 0000000..1321221
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -0,0 +1,170 @@
+//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements some simple delegations needed for call lowering.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+bool CallLowering::lowerCall(
+ MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+ ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+ auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+
+ // First step is to marshall all the function's parameters into the correct
+ // physregs and memory locations. Gather the sequence of argument types that
+ // we'll pass to the assigner function.
+ SmallVector<ArgInfo, 8> OrigArgs;
+ unsigned i = 0;
+ for (auto &Arg : CI.arg_operands()) {
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
+ setArgFlags(OrigArg, i + 1, DL, CI);
+ OrigArgs.push_back(OrigArg);
+ ++i;
+ }
+
+ MachineOperand Callee = MachineOperand::CreateImm(0);
+ if (Function *F = CI.getCalledFunction())
+ Callee = MachineOperand::CreateGA(F, 0);
+ else
+ Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+ ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+ if (!OrigRet.Ty->isVoidTy())
+ setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+
+ return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+}
+
+template <typename FuncInfoTy>
+void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const FuncInfoTy &FuncInfo) const {
+ const AttributeSet &Attrs = FuncInfo.getAttributes();
+ if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
+ Arg.Flags.setZExt();
+ if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
+ Arg.Flags.setSExt();
+ if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
+ Arg.Flags.setInReg();
+ if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
+ Arg.Flags.setSRet();
+ if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
+ Arg.Flags.setSwiftSelf();
+ if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
+ Arg.Flags.setSwiftError();
+ if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
+ Arg.Flags.setByVal();
+ if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
+ Arg.Flags.setInAlloca();
+
+ if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
+ Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
+ Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (FuncInfo.getParamAlignment(OpIdx))
+ FrameAlign = FuncInfo.getParamAlignment(OpIdx);
+ else
+ FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
+ Arg.Flags.setByValAlign(FrameAlign);
+ }
+ if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
+ Arg.Flags.setNest();
+ Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty));
+}
+
+template void
+CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const Function &FuncInfo) const;
+
+template void
+CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+ const DataLayout &DL,
+ const CallInst &FuncInfo) const;
+
+bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
+ CCAssignFn *AssignFn,
+ ArrayRef<ArgInfo> Args,
+ ValueHandler &Handler) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = *MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+ unsigned NumArgs = Args.size();
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT CurVT = MVT::getVT(Args[i].Ty);
+ if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+ return false;
+ }
+
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc())
+ Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
+ else if (VA.isMemLoc()) {
+ unsigned Size = VA.getValVT() == MVT::iPTR
+ ? DL.getPointerSize()
+ : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ unsigned Offset = VA.getLocMemOffset();
+ MachinePointerInfo MPO;
+ unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+ Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+ } else {
+ // FIXME: Support byvals and other weirdness
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+ CCValAssign &VA) {
+ LLT LocTy{VA.getLocVT()};
+ switch (VA.getLocInfo()) {
+ default: break;
+ case CCValAssign::Full:
+ case CCValAssign::BCvt:
+ // FIXME: bitconverting between vector types may or may not be a
+ // nop in big-endian situations.
+ return ValReg;
+ case CCValAssign::AExt:
+ assert(!VA.getLocVT().isVector() && "unexpected vector extend");
+ // Otherwise, it's a nop.
+ return ValReg;
+ case CCValAssign::SExt: {
+ unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildSExt(NewReg, ValReg);
+ return NewReg;
+ }
+ case CCValAssign::ZExt: {
+ unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ MIRBuilder.buildZExt(NewReg, ValReg);
+ return NewReg;
+ }
+ }
+ llvm_unreachable("unable to extend register");
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 231e5ac..fcd2722 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -25,6 +25,8 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
+ initializeLegalizerPass(Registry);
initializeRegBankSelectPass(Registry);
+ initializeInstructionSelectPass(Registry);
}
#endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b8a960c..89a042f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -14,12 +14,19 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#define DEBUG_TYPE "irtranslator"
@@ -27,13 +34,29 @@
using namespace llvm;
char IRTranslator::ID = 0;
-INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI",
- false, false);
+INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+ false, false)
+
+static void reportTranslationError(const Value &V, const Twine &Message) {
+ std::string ErrStorage;
+ raw_string_ostream Err(ErrStorage);
+ Err << Message << ": " << V << '\n';
+ report_fatal_error(Err.str());
+}
IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}
+void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
// Check if this is the first time we see Val.
@@ -42,56 +65,132 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
// we need to concat together to produce the value.
assert(Val.getType()->isSized() &&
"Don't know how to create an empty vreg");
- assert(!Val.getType()->isAggregateType() && "Not yet implemented");
- unsigned Size = Val.getType()->getPrimitiveSizeInBits();
- unsigned VReg = MRI->createGenericVirtualRegister(Size);
+ unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
ValReg = VReg;
- assert(!isa<Constant>(Val) && "Not yet implemented");
+
+ if (auto CV = dyn_cast<Constant>(&Val)) {
+ bool Success = translate(*CV, VReg);
+ if (!Success) {
+ if (!TPC->isGlobalISelAbortEnabled()) {
+ MF->getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ return VReg;
+ }
+ reportTranslationError(Val, "unable to translate constant");
+ }
+ }
}
return ValReg;
}
+int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
+ if (FrameIndices.find(&AI) != FrameIndices.end())
+ return FrameIndices[&AI];
+
+ unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+ unsigned Size =
+ ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
+
+ // Always allocate at least one byte.
+ Size = std::max(Size, 1u);
+
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+
+ int &FI = FrameIndices[&AI];
+ FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
+ return FI;
+}
+
+unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
+ unsigned Alignment = 0;
+ Type *ValTy = nullptr;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ Alignment = SI->getAlignment();
+ ValTy = SI->getValueOperand()->getType();
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Alignment = LI->getAlignment();
+ ValTy = LI->getType();
+ } else if (!TPC->isGlobalISelAbortEnabled()) {
+ MF->getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ return 1;
+ } else
+ llvm_unreachable("unhandled memory instruction");
+
+ return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
+}
+
MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
if (!MBB) {
- MachineFunction &MF = MIRBuilder.getMF();
- MBB = MF.CreateMachineBasicBlock();
- MF.push_back(MBB);
+ MBB = MF->CreateMachineBasicBlock(&BB);
+ MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
}
return *MBB;
}
-bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) {
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // FIXME: handle signed/unsigned wrapping flags.
+
// Get or create a virtual register for each value.
// Unless the value is a Constant => loadimm cst?
// or inline constant each time?
// Creation of a virtual register needs to have a size.
- unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0));
- unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1));
- unsigned Res = getOrCreateVReg(Inst);
- MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1);
+ unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+ unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+ unsigned Res = getOrCreateVReg(U);
+ MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
return true;
}
-bool IRTranslator::translateReturn(const Instruction &Inst) {
- assert(isa<ReturnInst>(Inst) && "Return expected");
- const Value *Ret = cast<ReturnInst>(Inst).getReturnValue();
+bool IRTranslator::translateCompare(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const CmpInst *CI = dyn_cast<CmpInst>(&U);
+ unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+ unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+ unsigned Res = getOrCreateVReg(U);
+ CmpInst::Predicate Pred =
+ CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
+ cast<ConstantExpr>(U).getPredicate());
+
+ if (CmpInst::isIntPredicate(Pred))
+ MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+ else
+ MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+
+ return true;
+}
+
+bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
+ const ReturnInst &RI = cast<ReturnInst>(U);
+ const Value *Ret = RI.getReturnValue();
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
}
-bool IRTranslator::translateBr(const Instruction &Inst) {
- assert(isa<BranchInst>(Inst) && "Branch expected");
- const BranchInst &BrInst = *cast<BranchInst>(&Inst);
- if (BrInst.isUnconditional()) {
- const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0));
- MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
- MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB);
- } else {
- assert(0 && "Not yet implemented");
+bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
+ const BranchInst &BrInst = cast<BranchInst>(U);
+ unsigned Succ = 0;
+ if (!BrInst.isUnconditional()) {
+ // We want a G_BRCOND to the true BB followed by an unconditional branch.
+ unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+ const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
+ MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+ MIRBuilder.buildBrCond(Tst, TrueBB);
}
+
+ const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
+ MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
+ MIRBuilder.buildBr(TgtBB);
+
// Link successors.
MachineBasicBlock &CurBB = MIRBuilder.getMBB();
for (const BasicBlock *Succ : BrInst.successors())
@@ -99,66 +198,694 @@ bool IRTranslator::translateBr(const Instruction &Inst) {
return true;
}
+bool IRTranslator::translateSwitch(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // For now, just translate as a chain of conditional branches.
+ // FIXME: could we share most of the logic/code in
+ // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
+ // At first sight, it seems most of the logic in there is independent of
+ // SelectionDAG-specifics and a lot of work went in to optimize switch
+ // lowering in there.
+
+ const SwitchInst &SwInst = cast<SwitchInst>(U);
+ const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+
+ LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ for (auto &CaseIt : SwInst.cases()) {
+ const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
+ const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+
+ MIRBuilder.buildBrCond(Tst, TrueBB);
+ CurBB.addSuccessor(&TrueBB);
+
+ MachineBasicBlock *FalseBB =
+ MF->CreateMachineBasicBlock(SwInst.getParent());
+ MF->push_back(FalseBB);
+ MIRBuilder.buildBr(*FalseBB);
+ CurBB.addSuccessor(FalseBB);
+
+ MIRBuilder.setMBB(*FalseBB);
+ }
+ // handle default case
+ MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
+ MIRBuilder.buildBr(DefaultBB);
+ MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+
+ return true;
+}
+
+bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
+ const LoadInst &LI = cast<LoadInst>(U);
+
+ if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
+ return false;
+
+ assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
+ auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone;
+ Flags |= MachineMemOperand::MOLoad;
+
+ unsigned Res = getOrCreateVReg(LI);
+ unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
+ LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+ MIRBuilder.buildLoad(
+ Res, Addr,
+ *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
+ Flags, DL->getTypeStoreSize(LI.getType()),
+ getMemOpAlignment(LI)));
+ return true;
+}
+
+bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
+ const StoreInst &SI = cast<StoreInst>(U);
+
+ if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
+ return false;
+
+ assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
+ auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
+ : MachineMemOperand::MONone;
+ Flags |= MachineMemOperand::MOStore;
+
+ unsigned Val = getOrCreateVReg(*SI.getValueOperand());
+ unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
+ LLT VTy{*SI.getValueOperand()->getType(), *DL},
+ PTy{*SI.getPointerOperand()->getType(), *DL};
+
+ MIRBuilder.buildStore(
+ Val, Addr,
+ *MF->getMachineMemOperand(
+ MachinePointerInfo(SI.getPointerOperand()), Flags,
+ DL->getTypeStoreSize(SI.getValueOperand()->getType()),
+ getMemOpAlignment(SI)));
+ return true;
+}
+
+bool IRTranslator::translateExtractValue(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const Value *Src = U.getOperand(0);
+ Type *Int32Ty = Type::getInt32Ty(U.getContext());
+ SmallVector<Value *, 1> Indices;
+
+ // getIndexedOffsetInType is designed for GEPs, so the first index is the
+ // usual array element rather than looking into the actual aggregate.
+ Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
+ for (auto Idx : EVI->indices())
+ Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+ } else {
+ for (unsigned i = 1; i < U.getNumOperands(); ++i)
+ Indices.push_back(U.getOperand(i));
+ }
+
+ uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+ unsigned Res = getOrCreateVReg(U);
+ MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+
+ return true;
+}
+
+bool IRTranslator::translateInsertValue(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const Value *Src = U.getOperand(0);
+ Type *Int32Ty = Type::getInt32Ty(U.getContext());
+ SmallVector<Value *, 1> Indices;
+
+ // getIndexedOffsetInType is designed for GEPs, so the first index is the
+ // usual array element rather than looking into the actual aggregate.
+ Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+ for (auto Idx : IVI->indices())
+ Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+ } else {
+ for (unsigned i = 2; i < U.getNumOperands(); ++i)
+ Indices.push_back(U.getOperand(i));
+ }
+
+ uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+ unsigned Res = getOrCreateVReg(U);
+ const Value &Inserted = *U.getOperand(1);
+ MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
+ Offset);
+
+ return true;
+}
+
+bool IRTranslator::translateSelect(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1)),
+ getOrCreateVReg(*U.getOperand(2)));
+ return true;
+}
+
+bool IRTranslator::translateBitCast(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+ unsigned &Reg = ValToVReg[&U];
+ if (Reg)
+ MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+ else
+ Reg = getOrCreateVReg(*U.getOperand(0));
+ return true;
+ }
+ return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
+}
+
+bool IRTranslator::translateCast(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ unsigned Op = getOrCreateVReg(*U.getOperand(0));
+ unsigned Res = getOrCreateVReg(U);
+ MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+ return true;
+}
+
+bool IRTranslator::translateGetElementPtr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // FIXME: support vector GEPs.
+ if (U.getType()->isVectorTy())
+ return false;
+
+ Value &Op0 = *U.getOperand(0);
+ unsigned BaseReg = getOrCreateVReg(Op0);
+ LLT PtrTy{*Op0.getType(), *DL};
+ unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
+ LLT OffsetTy = LLT::scalar(PtrSize);
+
+ int64_t Offset = 0;
+ for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
+ continue;
+ } else {
+ uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ Offset += ElementSize * CI->getSExtValue();
+ continue;
+ }
+
+ if (Offset != 0) {
+ unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+ MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+
+ BaseReg = NewBaseReg;
+ Offset = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+
+ unsigned IdxReg = getOrCreateVReg(*Idx);
+ if (MRI->getType(IdxReg) != OffsetTy) {
+ unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
+ IdxReg = NewIdxReg;
+ }
+
+ unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg);
+
+ unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+ BaseReg = NewBaseReg;
+ }
+ }
+
+ if (Offset != 0) {
+ unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+ MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+ return true;
+ }
+
+ MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
+ return true;
+}
+
+bool IRTranslator::translateMemcpy(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
+ if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
+ 0 ||
+ cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
+ 0 ||
+ SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
+ return false;
+
+ SmallVector<CallLowering::ArgInfo, 8> Args;
+ for (int i = 0; i < 3; ++i) {
+ const auto &Arg = CI.getArgOperand(i);
+ Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ }
+
+ MachineOperand Callee = MachineOperand::CreateES("memcpy");
+
+ return CLI->lowerCall(MIRBuilder, Callee,
+ CallLowering::ArgInfo(0, CI.getType()), Args);
+}
+
+void IRTranslator::getStackGuard(unsigned DstReg,
+ MachineIRBuilder &MIRBuilder) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
+ MIB.addDef(DstReg);
+
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent());
+ if (!Global)
+ return;
+
+ MachinePointerInfo MPInfo(Global);
+ MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable;
+ *MemRefs =
+ MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
+ DL->getPointerABIAlignment());
+ MIB.setMemRefs(MemRefs, MemRefs + 1);
+}
+
+bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+ MachineIRBuilder &MIRBuilder) {
+ LLT Ty{*CI.getOperand(0)->getType(), *DL};
+ LLT s1 = LLT::scalar(1);
+ unsigned Width = Ty.getSizeInBits();
+ unsigned Res = MRI->createGenericVirtualRegister(Ty);
+ unsigned Overflow = MRI->createGenericVirtualRegister(s1);
+ auto MIB = MIRBuilder.buildInstr(Op)
+ .addDef(Res)
+ .addDef(Overflow)
+ .addUse(getOrCreateVReg(*CI.getOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getOperand(1)));
+
+ if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
+ unsigned Zero = MRI->createGenericVirtualRegister(s1);
+ EntryBuilder.buildConstant(Zero, 0);
+ MIB.addUse(Zero);
+ }
+
+ MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+ return true;
+}
+
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder) {
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ // FIXME: these obviously need to be supported properly.
+ MF->getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ return true;
+ case Intrinsic::uadd_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+ case Intrinsic::sadd_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
+ case Intrinsic::usub_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+ case Intrinsic::ssub_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
+ case Intrinsic::umul_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
+ case Intrinsic::smul_with_overflow:
+ return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::memcpy:
+ return translateMemcpy(CI, MIRBuilder);
+ case Intrinsic::eh_typeid_for: {
+ GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
+ unsigned Reg = getOrCreateVReg(CI);
+ unsigned TypeID = MF->getTypeIDFor(GV);
+ MIRBuilder.buildConstant(Reg, TypeID);
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
+
+ MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
+ return true;
+ }
+ case Intrinsic::stackguard:
+ getStackGuard(getOrCreateVReg(CI), MIRBuilder);
+ return true;
+ case Intrinsic::stackprotector: {
+ LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+ unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ getStackGuard(GuardVal, MIRBuilder);
+
+ AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+ MIRBuilder.buildStore(
+ GuardVal, getOrCreateVReg(*Slot),
+ *MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF,
+ getOrCreateFrameIndex(*Slot)),
+ MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+ PtrTy.getSizeInBits() / 8, 8));
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
+ const CallInst &CI = cast<CallInst>(U);
+ auto TII = MF->getTarget().getIntrinsicInfo();
+ const Function *F = CI.getCalledFunction();
+
+ if (!F || !F->isIntrinsic()) {
+ unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+ SmallVector<unsigned, 8> Args;
+ for (auto &Arg: CI.arg_operands())
+ Args.push_back(getOrCreateVReg(*Arg));
+
+ return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+ return getOrCreateVReg(*CI.getCalledValue());
+ });
+ }
+
+ Intrinsic::ID ID = F->getIntrinsicID();
+ if (TII && ID == Intrinsic::not_intrinsic)
+ ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+
+ assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
+
+ if (translateKnownIntrinsic(CI, ID, MIRBuilder))
+ return true;
+
+ unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+ MachineInstrBuilder MIB =
+ MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+
+ for (auto &Arg : CI.arg_operands()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
+ MIB.addImm(CI->getSExtValue());
+ else
+ MIB.addUse(getOrCreateVReg(*Arg));
+ }
+ return true;
+}
+
+bool IRTranslator::translateInvoke(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const InvokeInst &I = cast<InvokeInst>(U);
+ MCContext &Context = MF->getContext();
+
+ const BasicBlock *ReturnBB = I.getSuccessor(0);
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+ const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // FIXME: support invoking patchpoint and statepoint intrinsics.
+ if (Fn && Fn->isIntrinsic())
+ return false;
+
+ // FIXME: support whatever these are.
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ return false;
+
+ // FIXME: support Windows exception handling.
+ if (!isa<LandingPadInst>(EHPadBB->front()))
+ return false;
+
+
+ // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
+ // the region covered by the try.
+ MCSymbol *BeginSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+
+ unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
+ SmallVector<CallLowering::ArgInfo, 8> Args;
+ for (auto &Arg: I.arg_operands())
+ Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+
+ if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
+ CallLowering::ArgInfo(Res, I.getType()), Args))
+ return false;
+
+ MCSymbol *EndSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+
+ // FIXME: track probabilities.
+ MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
+ &ReturnMBB = getOrCreateBB(*ReturnBB);
+ MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
+ MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+
+ return true;
+}
+
+bool IRTranslator::translateLandingPad(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const LandingPadInst &LP = cast<LandingPadInst>(U);
+
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ addLandingPadInfo(LP, MBB);
+
+ MBB.setIsEHPad();
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother.
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return true;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
+ return true;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
+ .addSym(MF->addLandingPad(&MBB));
+
+ // Mark exception register as live in.
+ SmallVector<unsigned, 2> Regs;
+ SmallVector<uint64_t, 2> Offsets;
+ LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
+ if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
+ unsigned VReg = MRI->createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(VReg, Reg);
+ Regs.push_back(VReg);
+ Offsets.push_back(0);
+ }
+
+ if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
+ unsigned VReg = MRI->createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(VReg, Reg);
+ Regs.push_back(VReg);
+ Offsets.push_back(p0.getSizeInBits());
+ }
+
+ MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+ return true;
+}
+
+bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
+ MachineIRBuilder &MIRBuilder) {
+ if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
+ return false;
+
+ assert(AI.isStaticAlloca() && "only handle static allocas now");
+ unsigned Res = getOrCreateVReg(AI);
+ int FI = getOrCreateFrameIndex(AI);
+ MIRBuilder.buildFrameIndex(Res, FI);
+ return true;
+}
+
+bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
+ const PHINode &PI = cast<PHINode>(U);
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI);
+ MIB.addDef(getOrCreateVReg(PI));
+
+ PendingPHIs.emplace_back(&PI, MIB.getInstr());
+ return true;
+}
+
+void IRTranslator::finishPendingPhis() {
+ for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) {
+ const PHINode *PI = Phi.first;
+ MachineInstrBuilder MIB(*MF, Phi.second);
+
+ // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
+ // won't create extra control flow here, otherwise we need to find the
+ // dominating predecessor here (or perhaps force the weirder IRTranslators
+ // to provide a simple boundary).
+ for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
+ assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
+ "I appear to have misunderstood Machine PHIs");
+ MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
+ MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+ }
+ }
+}
+
bool IRTranslator::translate(const Instruction &Inst) {
- MIRBuilder.setDebugLoc(Inst.getDebugLoc());
+ CurBuilder.setDebugLoc(Inst.getDebugLoc());
switch(Inst.getOpcode()) {
- case Instruction::Add:
- return translateBinaryOp(TargetOpcode::G_ADD, Inst);
- case Instruction::Or:
- return translateBinaryOp(TargetOpcode::G_OR, Inst);
- case Instruction::Br:
- return translateBr(Inst);
- case Instruction::Ret:
- return translateReturn(Inst);
-
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#include "llvm/IR/Instruction.def"
default:
- llvm_unreachable("Opcode not supported");
+ if (!TPC->isGlobalISelAbortEnabled())
+ return false;
+ llvm_unreachable("unknown opcode");
}
}
+bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+ if (auto CI = dyn_cast<ConstantInt>(&C))
+ EntryBuilder.buildConstant(Reg, *CI);
+ else if (auto CF = dyn_cast<ConstantFP>(&C))
+ EntryBuilder.buildFConstant(Reg, *CF);
+ else if (isa<UndefValue>(C))
+ EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+ else if (isa<ConstantPointerNull>(C))
+ EntryBuilder.buildConstant(Reg, 0);
+ else if (auto GV = dyn_cast<GlobalValue>(&C))
+ EntryBuilder.buildGlobalValue(Reg, GV);
+ else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+ switch(CE->getOpcode()) {
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#include "llvm/IR/Instruction.def"
+ default:
+ if (!TPC->isGlobalISelAbortEnabled())
+ return false;
+ llvm_unreachable("unknown opcode");
+ }
+ } else if (!TPC->isGlobalISelAbortEnabled())
+ return false;
+ else
+ llvm_unreachable("unhandled constant kind");
+
+ return true;
+}
-void IRTranslator::finalize() {
+void IRTranslator::finalizeFunction() {
// Release the memory used by the different maps we
// needed during the translation.
+ PendingPHIs.clear();
ValToVReg.clear();
+ FrameIndices.clear();
Constants.clear();
}
-bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
- const Function &F = *MF.getFunction();
+bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
+ MF = &CurMF;
+ const Function &F = *MF->getFunction();
if (F.empty())
return false;
- CLI = MF.getSubtarget().getCallLowering();
- MIRBuilder.setMF(MF);
- MRI = &MF.getRegInfo();
- // Setup the arguments.
- MachineBasicBlock &MBB = getOrCreateBB(F.front());
- MIRBuilder.setMBB(MBB);
+ CLI = MF->getSubtarget().getCallLowering();
+ CurBuilder.setMF(*MF);
+ EntryBuilder.setMF(*MF);
+ MRI = &MF->getRegInfo();
+ DL = &F.getParent()->getDataLayout();
+ TPC = &getAnalysis<TargetPassConfig>();
+
+ assert(PendingPHIs.empty() && "stale PHIs");
+
+ // Setup a separate basic-block for the arguments and constants, falling
+ // through to the IR-level Function's entry block.
+ MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
+ MF->push_back(EntryBB);
+ EntryBB->addSuccessor(&getOrCreateBB(F.front()));
+ EntryBuilder.setMBB(*EntryBB);
+
+ // Lower the actual args into this basic block.
SmallVector<unsigned, 8> VRegArgs;
for (const Argument &Arg: F.args())
VRegArgs.push_back(getOrCreateVReg(Arg));
- bool Succeeded =
- CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs);
- if (!Succeeded)
+ bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
+ if (!Succeeded) {
+ if (!TPC->isGlobalISelAbortEnabled()) {
+ MF->getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ finalizeFunction();
+ return false;
+ }
report_fatal_error("Unable to lower arguments");
+ }
+ // And translate the function!
for (const BasicBlock &BB: F) {
MachineBasicBlock &MBB = getOrCreateBB(BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
- MIRBuilder.setMBB(MBB);
+ CurBuilder.setMBB(MBB);
+
for (const Instruction &Inst: BB) {
- bool Succeeded = translate(Inst);
+ Succeeded &= translate(Inst);
if (!Succeeded) {
- DEBUG(dbgs() << "Cannot translate: " << Inst << '\n');
- report_fatal_error("Unable to translate instruction");
+ if (TPC->isGlobalISelAbortEnabled())
+ reportTranslationError(Inst, "unable to translate instruction");
+ MF->getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ break;
}
}
}
- // Now that the MachineFrameInfo has been configured, no further changes to
- // the reserved registers are possible.
- MRI->freezeReservedRegs(MF);
+ if (Succeeded) {
+ finishPendingPhis();
+
+ // Now that the MachineFrameInfo has been configured, no further changes to
+ // the reserved registers are possible.
+ MRI->freezeReservedRegs(*MF);
+
+ // Merge the argument lowering and constants block with its single
+ // successor, the LLVM-IR entry block. We want the basic block to
+ // be maximal.
+ assert(EntryBB->succ_size() == 1 &&
+ "Custom BB used for lowering should have only one successor");
+ // Get the successor of the current entry block.
+ MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+ assert(NewEntryBB.pred_size() == 1 &&
+ "LLVM-IR entry block has a predecessor!?");
+ // Move all the instruction from the current entry block to the
+ // new entry block.
+ NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+ EntryBB->end());
+
+ // Update the live-in information for the new entry block.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+ NewEntryBB.addLiveIn(LiveIn);
+ NewEntryBB.sortUniqueLiveIns();
+
+ // Get rid of the now empty basic block.
+ EntryBB->removeSuccessor(&NewEntryBB);
+ MF->remove(EntryBB);
+
+ assert(&MF->front() == &NewEntryBB &&
+ "New entry wasn't next in the list of basic block!");
+ }
+
+ finalizeFunction();
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
new file mode 100644
index 0000000..1d205cd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -0,0 +1,175 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "instruction-select"
+
+using namespace llvm;
+
+char InstructionSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
+ "Select target instructions out of generic instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
+ "Select target instructions out of generic instructions",
+ false, false)
+
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
+ initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
+}
+
+void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ std::string ErrStorage;
+ raw_string_ostream Err(ErrStorage);
+ Err << Message << ":\nIn function: " << MF.getName() << '\n';
+ if (MI)
+ Err << *MI << '\n';
+ report_fatal_error(Err.str());
+}
+
+bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ assert(ISel && "Cannot work without InstructionSelector");
+
+ // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
+ // other MF/MFI fields we need to initialize.
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+#ifndef NDEBUG
+ // Check that our input is fully legal: we require the function to have the
+ // Legalized property, so it should be.
+ // FIXME: This should be in the MachineVerifier, but it can't use the
+ // LegalizerInfo as it's currently in the separate GlobalISel library.
+ // The RegBankSelected property is already checked in the verifier. Note
+ // that it has the same layering problem, but we only use inline methods so
+ // end up not needing to link against the GlobalISel library.
+ if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+ reportSelectionError(&MI, "Instruction is not legal");
+
+#endif
+ // FIXME: We could introduce new blocks and will need to fix the outer loop.
+ // Until then, keep track of the number of blocks to assert that we don't.
+ const size_t NumBlocks = MF.size();
+
+ bool Failed = false;
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
+ if (MBB->empty())
+ continue;
+
+ // Select instructions in reverse block order. We permit erasing so have
+ // to resort to manually iterating and recognizing the begin (rend) case.
+ bool ReachedBegin = false;
+ for (auto MII = std::prev(MBB->end()), Begin = MBB->begin();
+ !ReachedBegin;) {
+#ifndef NDEBUG
+ // Keep track of the insertion range for debug printing.
+ const auto AfterIt = std::next(MII);
+#endif
+ // Select this instruction.
+ MachineInstr &MI = *MII;
+
+ // And have our iterator point to the next instruction, if there is one.
+ if (MII == Begin)
+ ReachedBegin = true;
+ else
+ --MII;
+
+ DEBUG(dbgs() << "Selecting: \n " << MI);
+
+ if (!ISel->select(MI)) {
+ if (TPC.isGlobalISelAbortEnabled())
+ // FIXME: It would be nice to dump all inserted instructions. It's
+ // not
+ // obvious how, esp. considering select() can insert after MI.
+ reportSelectionError(&MI, "Cannot select");
+ Failed = true;
+ break;
+ }
+
+ // Dump the range of instructions that MI expanded into.
+ DEBUG({
+ auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);
+ dbgs() << "Into:\n";
+ for (auto &InsertedMI : make_range(InsertedBegin, AfterIt))
+ dbgs() << " " << InsertedMI;
+ dbgs() << '\n';
+ });
+ }
+ }
+
+ // Now that selection is complete, there are no more generic vregs. Verify
+ // that the size of the now-constrained vreg is unchanged and that it has a
+ // register class.
+ for (auto &VRegToType : MRI.getVRegToType()) {
+ unsigned VReg = VRegToType.first;
+ auto *RC = MRI.getRegClassOrNull(VReg);
+ auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
+ ? nullptr
+ : &*MRI.def_instr_begin(VReg);
+ if (!RC) {
+ if (TPC.isGlobalISelAbortEnabled())
+ reportSelectionError(MI, "VReg as no regclass after selection");
+ Failed = true;
+ break;
+ }
+
+ if (VRegToType.second.isValid() &&
+ VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
+ if (TPC.isGlobalISelAbortEnabled())
+ reportSelectionError(
+ MI, "VReg has explicit size different from class size");
+ Failed = true;
+ break;
+ }
+ }
+
+ MRI.getVRegToType().clear();
+
+ if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ return false;
+ }
+ assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
+
+ // FIXME: Should we accurately track changes?
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
new file mode 100644
index 0000000..5c34da0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -0,0 +1,60 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelector class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "instructionselector"
+
+using namespace llvm;
+
+InstructionSelector::InstructionSelector() {}
+
+bool InstructionSelector::constrainSelectedInstRegOperands(
+ MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
+ MachineOperand &MO = I.getOperand(OpI);
+
+ // There's nothing to be done on non-register operands.
+ if (!MO.isReg())
+ continue;
+
+ DEBUG(dbgs() << "Converting operand: " << MO << '\n');
+ assert(MO.isReg() && "Unsupported non-reg operand");
+
+ unsigned Reg = MO.getReg();
+ // Physical registers don't need to be constrained.
+ if (TRI.isPhysicalRegister(Reg))
+ continue;
+
+ // Register operands with a value of 0 (e.g. predicate operands) don't need
+ // to be constrained.
+ if (Reg == 0)
+ continue;
+
+ // If the operand is a vreg, we should constrain its regclass, and only
+ // insert COPYs if that's impossible.
+ // constrainOperandRegClass does that for us.
+ MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
+ Reg, OpI));
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
new file mode 100644
index 0000000..e863568
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -0,0 +1,180 @@
+//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize individual
+/// instructions and the LegalizePass wrapper pass for the primary
+/// legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "legalizer"
+
+using namespace llvm;
+
+char Legalizer::ID = 0;
+INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
+ "Legalize the Machine IR a function's Machine IR", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
+ "Legalize the Machine IR a function's Machine IR", false,
+ false)
+
+Legalizer::Legalizer() : MachineFunctionPass(ID) {
+ initializeLegalizerPass(*PassRegistry::getPassRegistry());
+}
+
+void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void Legalizer::init(MachineFunction &MF) {
+}
+
+bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII) {
+ bool Changed = false;
+ if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
+ return Changed;
+
+ unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
+ unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+ MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
+ if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
+ return Changed;
+
+ unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
+ bool AllDefsReplaced = true;
+
+ // Try to match each register extracted with a corresponding insertion formed
+ // by the G_SEQUENCE.
+ for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
+ MachineOperand &ExtractMO = MI.getOperand(Idx);
+ assert(ExtractMO.isReg() && ExtractMO.isDef() &&
+ "unexpected extract operand");
+
+ unsigned ExtractReg = ExtractMO.getReg();
+ unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
+
+ while (SeqIdx < NumSeqSrcs &&
+ SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
+ ++SeqIdx;
+
+ if (SeqIdx == NumSeqSrcs) {
+ AllDefsReplaced = false;
+ continue;
+ }
+
+ unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
+ if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
+ MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
+ AllDefsReplaced = false;
+ continue;
+ }
+
+ assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
+ "unexpected physical register in G_SEQUENCE");
+
+ // Finally we can replace the uses.
+ for (auto &Use : MRI.use_operands(ExtractReg)) {
+ Changed = true;
+ Use.setReg(OrigReg);
+ }
+ }
+
+ if (AllDefsReplaced) {
+ // If SeqI was the next instruction in the BB and we removed it, we'd break
+ // the outer iteration.
+ assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
+ "G_SEQUENCE does not dominate G_EXTRACT");
+
+ MI.eraseFromParent();
+
+ if (MRI.use_empty(SrcReg))
+ SeqI.eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+ init(MF);
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+ LegalizerHelper Helper(MF);
+
+ // FIXME: an instruction may need more than one pass before it is legal. For
+ // example on most architectures <3 x i3> is doubly-illegal. It would
+ // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We
+ // probably want a worklist of instructions rather than naive iterate until
+ // convergence for performance reasons.
+ bool Changed = false;
+ MachineBasicBlock::iterator NextMI;
+ for (auto &MBB : MF)
+ for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+ // Get the next Instruction before we try to legalize, because there's a
+ // good chance MI will be deleted.
+ NextMI = std::next(MI);
+
+ // Only legalize pre-isel generic instructions: others don't have types
+ // and are assumed to be legal.
+ if (!isPreISelGenericOpcode(MI->getOpcode()))
+ continue;
+
+ auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
+
+ // Error out if we couldn't legalize this instruction. We may want to fall
+ // back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ if (!TPC.isGlobalISelAbortEnabled()) {
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ return false;
+ }
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+ OS << "unable to legalize instruction: ";
+ MI->print(OS);
+ report_fatal_error(OS.str());
+ }
+
+ Changed |= Res == LegalizerHelper::Legalized;
+ }
+
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ for (auto &MBB : MF) {
+ for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+ // Get the next Instruction before we try to legalize, because there's a
+ // good chance MI will be deleted.
+ NextMI = std::next(MI);
+
+ Changed |= combineExtracts(*MI, MRI, TII);
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
new file mode 100644
index 0000000..eb25b6c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -0,0 +1,354 @@
+//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize
+/// individual instructions and the LegalizeMachineIR wrapper pass for the
+/// primary legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <sstream>
+
+#define DEBUG_TYPE "legalize-mir"
+
+using namespace llvm;
+
+LegalizerHelper::LegalizerHelper(MachineFunction &MF)
+ : MRI(MF.getRegInfo()) {
+ MIRBuilder.setMF(MF);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
+ const LegalizerInfo &LegalizerInfo) {
+ auto Action = LegalizerInfo.getAction(MI, MRI);
+ switch (std::get<0>(Action)) {
+ case LegalizerInfo::Legal:
+ return AlreadyLegal;
+ case LegalizerInfo::Libcall:
+ return libcall(MI);
+ case LegalizerInfo::NarrowScalar:
+ return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::WidenScalar:
+ return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::Lower:
+ return lower(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::FewerElements:
+ return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+ default:
+ return UnableToLegalize;
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstr(MachineInstr &MI,
+ const LegalizerInfo &LegalizerInfo) {
+ SmallVector<MachineInstr *, 4> WorkList;
+ MIRBuilder.recordInsertions(
+ [&](MachineInstr *MI) { WorkList.push_back(MI); });
+ WorkList.push_back(&MI);
+
+ bool Changed = false;
+ LegalizeResult Res;
+ unsigned Idx = 0;
+ do {
+ Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
+ if (Res == UnableToLegalize) {
+ MIRBuilder.stopRecordingInsertions();
+ return UnableToLegalize;
+ }
+ Changed |= Res == Legalized;
+ ++Idx;
+ } while (Idx < WorkList.size());
+
+ MIRBuilder.stopRecordingInsertions();
+
+ return Changed ? Legalized : AlreadyLegal;
+}
+
+void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<unsigned> &VRegs) {
+ unsigned Size = Ty.getSizeInBits();
+ SmallVector<uint64_t, 4> Indexes;
+ for (int i = 0; i < NumParts; ++i) {
+ VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+ Indexes.push_back(i * Size);
+ }
+ MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::libcall(MachineInstr &MI) {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ MIRBuilder.setInstr(MI);
+
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_FREM: {
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ const char *Name =
+ TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
+
+ CLI.lowerCall(
+ MIRBuilder, MachineOperand::CreateES(Name),
+ {MI.getOperand(0).getReg(), Ty},
+ {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_ADD: {
+ // Expand in terms of carry-setting/consuming G_ADDE instructions.
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+ NarrowTy.getSizeInBits();
+
+ MIRBuilder.setInstr(MI);
+
+ SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+ unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildConstant(CarryIn, 0);
+
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+ MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
+ Src2Regs[i], CarryIn);
+
+ DstRegs.push_back(DstReg);
+ Indexes.push_back(i * NarrowSize);
+ CarryIn = CarryOut;
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
+ MIRBuilder.setInstr(MI);
+
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_SUB: {
+ // Perform operation at larger width (any extension is fine here, high bits
+ // don't affect the result) and then truncate the result back to the
+ // original type.
+ unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+ unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg());
+ MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg());
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstExt)
+ .addUse(Src1Ext)
+ .addUse(Src2Ext);
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV: {
+ unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
+
+ unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
+ MI.getOperand(1).getReg());
+
+ unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse(
+ MI.getOperand(2).getReg());
+
+ unsigned ResExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(ResExt)
+ .addUse(LHSExt)
+ .addUse(RHSExt);
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_LOAD: {
+ assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+ WideTy.getSizeInBits() &&
+ "illegal to increase number of bytes loaded");
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(),
+ **MI.memoperands_begin());
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+ WideTy.getSizeInBits() &&
+ "illegal to increase number of bytes modified by a store");
+
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+ MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
+ **MI.memoperands_begin());
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm());
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm());
+ MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_BRCOND: {
+ unsigned TstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg());
+ MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB());
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ICMP: {
+ assert(TypeIdx == 1 && "unable to legalize predicate");
+ bool IsSigned = CmpInst::isSigned(
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
+ unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+ unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+ if (IsSigned) {
+ MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg());
+ MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg());
+ } else {
+ MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg());
+ MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg());
+ }
+ MIRBuilder.buildICmp(
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
+ MI.getOperand(0).getReg(), Op0Ext, Op1Ext);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_GEP: {
+ assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+ unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg());
+ MI.getOperand(2).setReg(OffsetExt);
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ using namespace TargetOpcode;
+ MIRBuilder.setInstr(MI);
+
+ switch(MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_UREM: {
+ unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
+ .addDef(QuotReg)
+ .addUse(MI.getOperand(1).getReg())
+ .addUse(MI.getOperand(2).getReg());
+
+ unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
+ MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+ ProdReg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ switch (MI.getOpcode()) {
+ default:
+ return UnableToLegalize;
+ case TargetOpcode::G_ADD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize;
+
+ MIRBuilder.setInstr(MI);
+
+ SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
+ DstRegs.push_back(DstReg);
+ Indexes.push_back(i * NarrowSize);
+ }
+
+ MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
new file mode 100644
index 0000000..e496620
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -0,0 +1,182 @@
+//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+// Issues to be resolved:
+// + Make it fast.
+// + Support weird types like i3, <7 x i3>, ...
+// + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+ // FIXME: these two can be legalized to the fundamental load/store Jakob
+ // proposed. Once loads & stores are supported.
+ DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
+ DefaultActions[TargetOpcode::G_TRUNC] = Legal;
+
+ DefaultActions[TargetOpcode::G_INTRINSIC] = Legal;
+ DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal;
+
+ DefaultActions[TargetOpcode::G_ADD] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
+
+ DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+}
+
+void LegalizerInfo::computeTables() {
+ for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) {
+ for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) {
+ for (auto &Action : Actions[Opcode][Idx]) {
+ LLT Ty = Action.first;
+ if (!Ty.isVector())
+ continue;
+
+ auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp,
+ Ty.getElementType())];
+ Entry = std::max(Entry, Ty.getNumElements());
+ }
+ }
+ }
+
+ TablesInitialized = true;
+}
+
+// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
+// probably going to need specialized lookup structures for various types before
+// we have any hope of doing well with something like <13 x i3>. Even the common
+// cases should do better than what we have now.
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::getAction(const InstrAspect &Aspect) const {
+ assert(TablesInitialized && "backend forgot to call computeTables");
+ // These *have* to be implemented for now, they're the fundamental basis of
+ // how everything else is transformed.
+
+ // Nothing is going to go well with types that aren't a power of 2 yet, so
+ // don't even try because we might make things worse.
+ if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
+ return std::make_pair(Unsupported, LLT());
+
+ // FIXME: the long-term plan calls for expansion in terms of load/store (if
+ // they're not legal).
+ if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
+ Aspect.Opcode == TargetOpcode::G_EXTRACT)
+ return std::make_pair(Legal, Aspect.Type);
+
+ LegalizeAction Action = findInActions(Aspect);
+ if (Action != NotFound)
+ return findLegalAction(Aspect, Action);
+
+ unsigned Opcode = Aspect.Opcode;
+ LLT Ty = Aspect.Type;
+ if (!Ty.isVector()) {
+ auto DefaultAction = DefaultActions.find(Aspect.Opcode);
+ if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
+ return std::make_pair(Legal, Ty);
+
+ if (DefaultAction == DefaultActions.end() ||
+ DefaultAction->second != NarrowScalar)
+ return std::make_pair(Unsupported, LLT());
+ return findLegalAction(Aspect, NarrowScalar);
+ }
+
+ LLT EltTy = Ty.getElementType();
+ int NumElts = Ty.getNumElements();
+
+ auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy));
+ if (ScalarAction != ScalarInVectorActions.end() &&
+ ScalarAction->second != Legal)
+ return findLegalAction(Aspect, ScalarAction->second);
+
+ // The element type is legal in principle, but the number of elements is
+ // wrong.
+ auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy));
+ if (MaxLegalElts > NumElts)
+ return findLegalAction(Aspect, MoreElements);
+
+ if (MaxLegalElts == 0) {
+ // Scalarize if there's no legal vector type, which is just a special case
+ // of FewerElements.
+ return std::make_pair(FewerElements, EltTy);
+ }
+
+ return findLegalAction(Aspect, FewerElements);
+}
+
+std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
+LegalizerInfo::getAction(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ SmallBitVector SeenTypes(8);
+ const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+ for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
+ if (!OpInfo[i].isGenericType())
+ continue;
+
+ // We don't want to repeatedly check the same operand index, that
+ // could get expensive.
+ unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
+ if (SeenTypes[TypeIdx])
+ continue;
+
+ SeenTypes.set(TypeIdx);
+
+ LLT Ty = MRI.getType(MI.getOperand(i).getReg());
+ auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
+ if (Action.first != Legal)
+ return std::make_tuple(Action.first, TypeIdx, Action.second);
+ }
+ return std::make_tuple(Legal, 0, LLT{});
+}
+
+bool LegalizerInfo::isLegal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ return std::get<0>(getAction(MI, MRI)) == Legal;
+}
+
+LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
+ LegalizeAction Action) const {
+ switch(Action) {
+ default:
+ llvm_unreachable("Cannot find legal type");
+ case Legal:
+ case Lower:
+ case Libcall:
+ return Aspect.Type;
+ case NarrowScalar: {
+ return findLegalType(Aspect,
+ [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ }
+ case WidenScalar: {
+ return findLegalType(Aspect, [&](LLT Ty) -> LLT {
+ return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
+ });
+ }
+ case FewerElements: {
+ return findLegalType(Aspect,
+ [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+ }
+ case MoreElements: {
+ return findLegalType(Aspect,
+ [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 2f19bcf..c04f6e4 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -23,82 +24,408 @@ using namespace llvm;
void MachineIRBuilder::setMF(MachineFunction &MF) {
this->MF = &MF;
this->MBB = nullptr;
+ this->MRI = &MF.getRegInfo();
this->TII = MF.getSubtarget().getInstrInfo();
this->DL = DebugLoc();
- this->MI = nullptr;
+ this->II = MachineBasicBlock::iterator();
+ this->InsertedInstr = nullptr;
}
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
this->MBB = &MBB;
- Before = Beginning;
+ this->II = MBB.end();
assert(&getMF() == MBB.getParent() &&
"Basic block is in a different function");
}
-void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
assert(MI.getParent() && "Instruction is not part of a basic block");
setMBB(*MI.getParent());
- this->MI = &MI;
- this->Before = Before;
+ this->II = MI.getIterator();
}
-MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() {
- if (MI) {
- if (Before)
- return MI;
- if (!MI->getNextNode())
- return getMBB().end();
- return MI->getNextNode();
- }
- return Before ? getMBB().begin() : getMBB().end();
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II) {
+ assert(MBB.getParent() == &getMF() &&
+ "Basic block is in a different function");
+ this->MBB = &MBB;
+ this->II = II;
+}
+
+void MachineIRBuilder::recordInsertions(
+ std::function<void(MachineInstr *)> Inserted) {
+ InsertedInstr = Inserted;
+}
+
+void MachineIRBuilder::stopRecordingInsertions() {
+ InsertedInstr = nullptr;
}
//------------------------------------------------------------------------------
// Build instruction variants.
//------------------------------------------------------------------------------
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) {
- MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode));
- if (Ty) {
- assert(isPreISelGenericOpcode(Opcode) &&
- "Only generic instruction can have a type");
- NewMI->setType(Ty);
- } else
- assert(!isPreISelGenericOpcode(Opcode) &&
- "Generic instruction must have a type");
- getMBB().insert(getInsertPt(), NewMI);
- return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
+ return insertInstr(buildInstrNoInsert(Opcode));
}
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
- unsigned Op0, unsigned Op1) {
- return buildInstr(Opcode, nullptr, Res, Op0, Op1);
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
+ MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode));
+ return MIB;
}
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
- unsigned Res, unsigned Op0,
- unsigned Op1) {
- MachineInstr *NewMI = buildInstr(Opcode, Ty);
- MachineInstrBuilder(getMF(), NewMI)
- .addReg(Res, RegState::Define)
- .addReg(Op0)
- .addReg(Op1);
- return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
+ getMBB().insert(getInsertPt(), MIB);
+ if (InsertedInstr)
+ InsertedInstr(MIB);
+ return MIB;
}
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
- unsigned Op0) {
- MachineInstr *NewMI = buildInstr(Opcode, nullptr);
- MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0);
- return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
+ assert(MRI->getType(Res).isPointer() && "invalid operand type");
+ return buildInstr(TargetOpcode::G_FRAME_INDEX)
+ .addDef(Res)
+ .addFrameIndex(Idx);
}
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) {
- return buildInstr(Opcode, nullptr);
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+ const GlobalValue *GV) {
+ assert(MRI->getType(Res).isPointer() && "invalid operand type");
+ assert(MRI->getType(Res).getAddressSpace() ==
+ GV->getType()->getAddressSpace() &&
+ "address space mismatch");
+
+ return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
+ .addDef(Res)
+ .addGlobalAddress(GV);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_ADD)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert(MRI->getType(Res).isPointer() &&
+ MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+ assert(MRI->getType(Op1).isScalar() && "invalid offset type");
+
+ return buildInstr(TargetOpcode::G_GEP)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_SUB)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_MUL)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
}
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
- MachineBasicBlock &BB) {
- MachineInstr *NewMI = buildInstr(Opcode, Ty);
- MachineInstrBuilder(getMF(), NewMI).addMBB(&BB);
- return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
+ return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+ return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+ const ConstantInt &Val) {
+ LLT Ty = MRI->getType(Res);
+
+ assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
+
+ const ConstantInt *NewVal = &Val;
+ if (Ty.getSizeInBits() != Val.getBitWidth())
+ NewVal = ConstantInt::get(MF->getFunction()->getContext(),
+ Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
+
+ return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+ int64_t Val) {
+ auto IntN = IntegerType::get(MF->getFunction()->getContext(),
+ MRI->getType(Res).getSizeInBits());
+ ConstantInt *CI = ConstantInt::get(IntN, Val, true);
+ return buildConstant(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res,
+ const ConstantFP &Val) {
+ assert(MRI->getType(Res).isScalar() && "invalid operand type");
+
+ return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+ MachineBasicBlock &Dest) {
+ assert(MRI->getType(Tst).isScalar() && "invalid operand type");
+
+ return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+ MachineMemOperand &MMO) {
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+ assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+ return buildInstr(TargetOpcode::G_LOAD)
+ .addDef(Res)
+ .addUse(Addr)
+ .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+ MachineMemOperand &MMO) {
+ assert(MRI->getType(Val).isValid() && "invalid operand type");
+ assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+ return buildInstr(TargetOpcode::G_STORE)
+ .addUse(Val)
+ .addUse(Addr)
+ .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,
+ unsigned CarryOut,
+ unsigned Op0, unsigned Op1,
+ unsigned CarryIn) {
+ assert(MRI->getType(Res).isScalar() && "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+ assert(MRI->getType(CarryOut).isScalar() && "invalid operand type");
+ assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_UADDE)
+ .addDef(Res)
+ .addDef(CarryOut)
+ .addUse(Op0)
+ .addUse(Op1)
+ .addUse(CarryIn);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) {
+ validateTruncExt(Res, Op, true);
+ return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) {
+ validateTruncExt(Res, Op, true);
+ return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
+ validateTruncExt(Res, Op, true);
+ return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ unsigned Opcode = TargetOpcode::COPY;
+ if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_SEXT;
+ else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_TRUNC;
+
+ return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
+ ArrayRef<uint64_t> Indices,
+ unsigned Src) {
+#ifndef NDEBUG
+ assert(Results.size() == Indices.size() && "inconsistent number of regs");
+ assert(!Results.empty() && "invalid trivial extract");
+ assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+ "extract offsets must be in ascending order");
+
+ assert(MRI->getType(Src).isValid() && "invalid operand type");
+ for (auto Res : Results)
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+#endif
+
+ auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
+ for (auto Res : Results)
+ MIB.addDef(Res);
+
+ MIB.addUse(Src);
+
+ for (auto Idx : Indices)
+ MIB.addImm(Idx);
+
+ getMBB().insert(getInsertPt(), MIB);
+ if (InsertedInstr)
+ InsertedInstr(MIB);
+
+ return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildSequence(unsigned Res,
+ ArrayRef<unsigned> Ops,
+ ArrayRef<uint64_t> Indices) {
+#ifndef NDEBUG
+ assert(Ops.size() == Indices.size() && "incompatible args");
+ assert(!Ops.empty() && "invalid trivial sequence");
+ assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+ "sequence offsets must be in ascending order");
+
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+ for (auto Op : Ops)
+ assert(MRI->getType(Op).isValid() && "invalid operand type");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
+ MIB.addDef(Res);
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ MIB.addUse(Ops[i]);
+ MIB.addImm(Indices[i]);
+ }
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ unsigned Res,
+ bool HasSideEffects) {
+ auto MIB =
+ buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+ : TargetOpcode::G_INTRINSIC);
+ if (Res)
+ MIB.addDef(Res);
+ MIB.addIntrinsicID(ID);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) {
+ validateTruncExt(Res, Op, false);
+ return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) {
+ validateTruncExt(Res, Op, false);
+ return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
+#ifndef NDEBUG
+ assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch");
+ assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
+ if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer())
+ assert(MRI->getType(Res).isScalar() && "type mismatch");
+ else
+ assert(MRI->getType(Res).isVector() &&
+ MRI->getType(Res).getNumElements() ==
+ MRI->getType(Op0).getNumElements() &&
+ "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_ICMP)
+ .addDef(Res)
+ .addPredicate(Pred)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
+#ifndef NDEBUG
+ assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch");
+ assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
+ if (MRI->getType(Op0).isScalar())
+ assert(MRI->getType(Res).isScalar() && "type mismatch");
+ else
+ assert(MRI->getType(Res).isVector() &&
+ MRI->getType(Res).getNumElements() ==
+ MRI->getType(Op0).getNumElements() &&
+ "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_FCMP)
+ .addDef(Res)
+ .addPredicate(Pred)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
+ unsigned Op0, unsigned Op1) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+ "invalid operand type");
+ assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) &&
+ "type mismatch");
+ if (ResTy.isScalar() || ResTy.isPointer())
+ assert(MRI->getType(Tst).isScalar() && "type mismatch");
+ else
+ assert(MRI->getType(Tst).isVector() &&
+ MRI->getType(Tst).getNumElements() ==
+ MRI->getType(Op0).getNumElements() &&
+ "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_SELECT)
+ .addDef(Res)
+ .addUse(Tst)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
+void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
+ bool IsExtend) {
+#ifndef NDEBUG
+ LLT SrcTy = MRI->getType(Src);
+ LLT DstTy = MRI->getType(Dst);
+
+ if (DstTy.isVector()) {
+ assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector");
+ assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+ "different number of elements in a trunc/ext");
+ } else
+ assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
+
+ if (IsExtend)
+ assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+ "invalid narrowing extend");
+ else
+ assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+ "invalid widening trunc");
+#endif
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 419e270..cc026ef 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -12,10 +12,12 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
@@ -31,18 +33,18 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
"Run the Fast mode (default mapping)"),
clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
- "Use the Greedy mode (best local mapping)"),
- clEnumValEnd));
+ "Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
-INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
- false);
+ false)
RegBankSelect::RegBankSelect(Mode RunningMode)
: MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
@@ -60,6 +62,7 @@ void RegBankSelect::init(MachineFunction &MF) {
assert(RBI && "Cannot work without RegisterBankInfo");
MRI = &MF.getRegInfo();
TRI = MF.getSubtarget().getRegisterInfo();
+ TPC = &getAnalysis<TargetPassConfig>();
if (OptMode != Mode::Fast) {
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -77,6 +80,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
}
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -87,7 +91,7 @@ bool RegBankSelect::assignmentMatch(
OnlyAssign = false;
// Each part of a break down needs to end up in a different register.
// In other word, Reg assignement does not match.
- if (ValMapping.BreakDown.size() > 1)
+ if (ValMapping.NumBreakDowns > 1)
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -103,11 +107,13 @@ bool RegBankSelect::assignmentMatch(
return CurRegBank == DesiredRegBrank;
}
-void RegBankSelect::repairReg(
+bool RegBankSelect::repairReg(
MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
RegBankSelect::RepairingPlacement &RepairPt,
const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
- assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented");
+ if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
+ return false;
+ assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
// An empty range of new register means no repairing.
assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
@@ -126,7 +132,7 @@ void RegBankSelect::repairReg(
"We are about to create several defs for Dst");
// Build the instruction used to repair, then clone it at the right places.
- MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src);
+ MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
MI->removeFromParent();
DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
<< '\n');
@@ -149,15 +155,16 @@ void RegBankSelect::repairReg(
}
// TODO:
// Legalize NewInstrs if need be.
+ return true;
}
uint64_t RegBankSelect::getRepairCost(
const MachineOperand &MO,
const RegisterBankInfo::ValueMapping &ValMapping) const {
assert(MO.isReg() && "We should only repair register operand");
- assert(!ValMapping.BreakDown.empty() && "Nothing to map??");
+ assert(ValMapping.NumBreakDowns && "Nothing to map??");
- bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1;
+ bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1;
const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
// If MO does not have a register bank, we should have just been
// able to set one unless we have to break the value down.
@@ -195,16 +202,20 @@ uint64_t RegBankSelect::getRepairCost(
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != UINT_MAX)
return Cost;
- assert(false && "Legalization not available yet");
+ assert(!TPC->isGlobalISelAbortEnabled() &&
+ "Legalization not available yet");
// Return the legalization cost of that repairing.
}
- assert(false && "Complex repairing not implemented yet");
- return 1;
+ assert(!TPC->isGlobalISelAbortEnabled() &&
+ "Complex repairing not implemented yet");
+ return UINT_MAX;
}
RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
SmallVectorImpl<RepairingPlacement> &RepairPts) {
+ assert(!PossibleMappings.empty() &&
+ "Do not know how to map this instruction");
RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
MappingCost Cost = MappingCost::ImpossibleCost();
@@ -212,6 +223,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) {
MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost);
if (CurCost < Cost) {
+ DEBUG(dbgs() << "New best: " << CurCost << '\n');
Cost = CurCost;
BestMapping = &CurMapping;
RepairPts.clear();
@@ -219,7 +231,15 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
RepairPts.emplace_back(std::move(RepairPt));
}
}
- assert(BestMapping && "No suitable mapping for instruction");
+ if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
+ // If none of the mapping worked that means they are all impossible.
+ // Thus, pick the first one and set an impossible repairing point.
+ // It will trigger the failed isel mode.
+ BestMapping = &(*PossibleMappings.begin());
+ RepairPts.emplace_back(
+ RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
+ } else
+ assert(BestMapping && "No suitable mapping for instruction");
return *BestMapping;
}
@@ -250,7 +270,7 @@ void RegBankSelect::tryAvoidingSplit(
// For the PHI case, the split may not be actually required.
// In the copy case, a phi is already a copy on the incoming edge,
// therefore there is no need to split.
- if (ValMapping.BreakDown.size() == 1)
+ if (ValMapping.NumBreakDowns == 1)
// This is a already a copy, there is nothing to do.
RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
}
@@ -327,7 +347,7 @@ void RegBankSelect::tryAvoidingSplit(
// We will split all the edges and repair there.
} else {
// This is a virtual register defined by a terminator.
- if (ValMapping.BreakDown.size() == 1) {
+ if (ValMapping.NumBreakDowns == 1) {
// There is nothing to repair, but we may actually lie on
// the repairing cost because of the PHIs already proceeded
// as already stated.
@@ -348,6 +368,9 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
const RegBankSelect::MappingCost *BestCost) {
assert((MBFI || !BestCost) && "Costs comparison require MBFI");
+ if (!InstrMapping.isValid())
+ return MappingCost::ImpossibleCost();
+
// If mapped with InstrMapping, MI will have the recorded cost.
MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
@@ -355,32 +378,34 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
DEBUG(dbgs() << "With: " << InstrMapping << '\n');
RepairPts.clear();
- if (BestCost && Cost > *BestCost)
+ if (BestCost && Cost > *BestCost) {
+ DEBUG(dbgs() << "Mapping is too expensive from the start\n");
return Cost;
+ }
// Moreover, to realize this mapping, the register bank of each operand must
// match this mapping. In other words, we may need to locally reassign the
// register banks. Account for that repairing cost as well.
// In this context, local means in the surrounding of MI.
- for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
- ++OpIdx) {
+ for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands();
+ OpIdx != EndOpIdx; ++OpIdx) {
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- DEBUG(dbgs() << "Opd" << OpIdx);
+ DEBUG(dbgs() << "Opd" << OpIdx << '\n');
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
// If Reg is already properly mapped, this is free.
bool Assign;
if (assignmentMatch(Reg, ValMapping, Assign)) {
- DEBUG(dbgs() << " is free (match).\n");
+ DEBUG(dbgs() << "=> is free (match).\n");
continue;
}
if (Assign) {
- DEBUG(dbgs() << " is free (simple assignment).\n");
+ DEBUG(dbgs() << "=> is free (simple assignment).\n");
RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
RepairingPlacement::Reassign));
continue;
@@ -398,8 +423,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
tryAvoidingSplit(RepairPt, MO, ValMapping);
// Check that the materialization of the repairing is possible.
- if (!RepairPt.canMaterialize())
+ if (!RepairPt.canMaterialize()) {
+ DEBUG(dbgs() << "Mapping involves impossible repairing\n");
return MappingCost::ImpossibleCost();
+ }
// Account for the split cost and repair cost.
// Unless the cost is already saturated or we do not care about the cost.
@@ -454,8 +481,10 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// Stop looking into what it takes to repair, this is already
// too expensive.
- if (BestCost && Cost > *BestCost)
+ if (BestCost && Cost > *BestCost) {
+ DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
return Cost;
+ }
// No need to accumulate more cost information.
// We need to still gather the repairing information though.
@@ -463,10 +492,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
break;
}
}
+ DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
return Cost;
}
-void RegBankSelect::applyMapping(
+bool RegBankSelect::applyMapping(
MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
// OpdMapper will hold all the information needed for the rewritting.
@@ -474,28 +504,27 @@ void RegBankSelect::applyMapping(
// First, place the repairing code.
for (RepairingPlacement &RepairPt : RepairPts) {
- assert(RepairPt.canMaterialize() &&
- RepairPt.getKind() != RepairingPlacement::Impossible &&
- "This mapping is impossible");
+ if (!RepairPt.canMaterialize() ||
+ RepairPt.getKind() == RepairingPlacement::Impossible)
+ return false;
assert(RepairPt.getKind() != RepairingPlacement::None &&
"This should not make its way in the list");
unsigned OpIdx = RepairPt.getOpIdx();
MachineOperand &MO = MI.getOperand(OpIdx);
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
- unsigned BreakDownSize = ValMapping.BreakDown.size();
- (void)BreakDownSize;
unsigned Reg = MO.getReg();
switch (RepairPt.getKind()) {
case RepairingPlacement::Reassign:
- assert(BreakDownSize == 1 &&
+ assert(ValMapping.NumBreakDowns == 1 &&
"Reassignment should only be for simple mapping");
MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
break;
case RepairingPlacement::Insert:
OpdMapper.createVRegs(OpIdx);
- repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx));
+ if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
+ return false;
break;
default:
llvm_unreachable("Other kind should not happen");
@@ -504,9 +533,10 @@ void RegBankSelect::applyMapping(
// Second, rewrite the instruction.
DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
+ return true;
}
-void RegBankSelect::assignInstr(MachineInstr &MI) {
+bool RegBankSelect::assignInstr(MachineInstr &MI) {
DEBUG(dbgs() << "Assign: " << MI);
// Remember the repairing placement for all the operands.
SmallVector<RepairingPlacement, 4> RepairPts;
@@ -516,32 +546,63 @@ void RegBankSelect::assignInstr(MachineInstr &MI) {
BestMapping = RBI->getInstrMapping(MI);
MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
(void)DefaultCost;
- assert(DefaultCost != MappingCost::ImpossibleCost() &&
- "Default mapping is not suited");
+ if (DefaultCost == MappingCost::ImpossibleCost())
+ return false;
} else {
RegisterBankInfo::InstructionMappings PossibleMappings =
RBI->getInstrPossibleMappings(MI);
- assert(!PossibleMappings.empty() &&
- "Do not know how to map this instruction");
+ if (PossibleMappings.empty())
+ return false;
BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
}
// Make sure the mapping is valid for MI.
assert(BestMapping.verify(MI) && "Invalid instruction mapping");
- DEBUG(dbgs() << "Mapping: " << BestMapping << '\n');
+ DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n');
// After this call, MI may not be valid anymore.
// Do not use it.
- applyMapping(MI, BestMapping, RepairPts);
+ return applyMapping(MI, BestMapping, RepairPts);
}
bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
const Function *F = MF.getFunction();
Mode SaveOptMode = OptMode;
if (F->hasFnAttribute(Attribute::OptimizeNone))
OptMode = Mode::Fast;
init(MF);
+
+#ifndef NDEBUG
+ // Check that our input is fully legal: we require the function to have the
+ // Legalized property, so it should be.
+ // FIXME: This should be in the MachineVerifier, but it can't use the
+ // LegalizerInfo as it's currently in the separate GlobalISel library.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+ if (!TPC->isGlobalISelAbortEnabled()) {
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailedISel);
+ return false;
+ }
+ std::string ErrStorage;
+ raw_string_ostream Err(ErrStorage);
+ Err << "Instruction is not legal: " << MI << '\n';
+ report_fatal_error(Err.str());
+ }
+ }
+ }
+ }
+#endif
+
// Walk the function and assign register banks to all operands.
// Use a RPOT to make sure all registers are assigned before we choose
// the best mapping of the current instruction.
@@ -554,7 +615,18 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
MII != End;) {
// MI might be invalidated by the assignment, so move the
// iterator before hand.
- assignInstr(*MII++);
+ MachineInstr &MI = *MII++;
+
+ // Ignore target-specific instructions: they should use proper regclasses.
+ if (isTargetSpecificOpcode(MI.getOpcode()))
+ continue;
+
+ if (!assignInstr(MI)) {
+ if (TPC->isGlobalISelAbortEnabled())
+ report_fatal_error("Unable to map instruction");
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ return false;
+ }
}
}
OptMode = SaveOptMode;
@@ -895,3 +967,20 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
LocalFreq == Cost.LocalFreq;
}
+
+void RegBankSelect::MappingCost::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
+ if (*this == ImpossibleCost()) {
+ OS << "impossible";
+ return;
+ }
+ if (isSaturated()) {
+ OS << "saturated";
+ return;
+ }
+ OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index a911225..49d676f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -19,12 +19,15 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {}
+RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size,
+ const uint32_t *CoveredClasses)
+ : ID(ID), Name(Name), Size(Size) {
+ ContainedRegClasses.resize(200);
+ ContainedRegClasses.setBitsInMask(CoveredClasses);
+}
bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
assert(isValid() && "Invalid register bank");
- assert(ContainedRegClasses.size() == TRI.getNumRegClasses() &&
- "TRI does not match the initialization process?");
for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
@@ -72,7 +75,7 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
return &OtherRB == this;
}
-void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
print(dbgs(), /* IsForDebug */ true, TRI);
}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index ef8e4f6..da5ab0b 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -32,139 +33,56 @@
using namespace llvm;
+STATISTIC(NumPartialMappingsCreated,
+ "Number of partial mappings dynamically created");
+STATISTIC(NumPartialMappingsAccessed,
+ "Number of partial mappings dynamically accessed");
+STATISTIC(NumValueMappingsCreated,
+ "Number of value mappings dynamically created");
+STATISTIC(NumValueMappingsAccessed,
+ "Number of value mappings dynamically accessed");
+STATISTIC(NumOperandsMappingsCreated,
+ "Number of operands mappings dynamically created");
+STATISTIC(NumOperandsMappingsAccessed,
+ "Number of operands mappings dynamically accessed");
+
const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
//------------------------------------------------------------------------------
// RegisterBankInfo implementation.
//------------------------------------------------------------------------------
-RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks)
- : NumRegBanks(NumRegBanks) {
- RegBanks.reset(new RegisterBank[NumRegBanks]);
+RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
+ unsigned NumRegBanks)
+ : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+ assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
+ assert(RegBanks[Idx]->isValid() && "RegisterBank should be valid");
+ }
+#endif // NDEBUG
+}
+
+RegisterBankInfo::~RegisterBankInfo() {
+ for (auto It : MapOfPartialMappings)
+ delete It.second;
+ for (auto It : MapOfValueMappings)
+ delete It.second;
}
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
- DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+#ifndef NDEBUG
+ for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
const RegisterBank &RegBank = getRegBank(Idx);
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
- dbgs() << "Verify " << RegBank << '\n';
+ DEBUG(dbgs() << "Verify " << RegBank << '\n');
assert(RegBank.verify(TRI) && "RegBank is invalid");
- });
+ }
+#endif // NDEBUG
return true;
}
-void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) {
- DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name
- << "\"\n");
- RegisterBank &RegBank = getRegBank(ID);
- assert(RegBank.getID() == RegisterBank::InvalidID &&
- "A register bank should be created only once");
- RegBank.ID = ID;
- RegBank.Name = Name;
-}
-
-void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
- const TargetRegisterInfo &TRI,
- bool AddTypeMapping) {
- RegisterBank &RB = getRegBank(ID);
- unsigned NbOfRegClasses = TRI.getNumRegClasses();
-
- DEBUG(dbgs() << "Add coverage for: " << RB << '\n');
-
- // Check if RB is underconstruction.
- if (!RB.isValid())
- RB.ContainedRegClasses.resize(NbOfRegClasses);
- else if (RB.covers(*TRI.getRegClass(RCId)))
- // If RB already covers this register class, there is nothing
- // to do.
- return;
-
- BitVector &Covered = RB.ContainedRegClasses;
- SmallVector<unsigned, 8> WorkList;
-
- WorkList.push_back(RCId);
- Covered.set(RCId);
-
- unsigned &MaxSize = RB.Size;
- do {
- unsigned RCId = WorkList.pop_back_val();
-
- const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId);
-
- DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC)
- << "(Size*8: " << (CurRC.getSize() * 8) << ")\n");
-
- // Remember the biggest size in bits.
- MaxSize = std::max(MaxSize, CurRC.getSize() * 8);
-
- // If we have been asked to record the type supported by this
- // register bank, do it now.
- if (AddTypeMapping)
- for (MVT::SimpleValueType SVT :
- make_range(CurRC.vt_begin(), CurRC.vt_end()))
- recordRegBankForType(getRegBank(ID), SVT);
-
- // Walk through all sub register classes and push them into the worklist.
- bool First = true;
- for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid();
- ++It) {
- unsigned SubRCId = It.getID();
- if (!Covered.test(SubRCId)) {
- if (First)
- DEBUG(dbgs() << " Enqueue sub-class: ");
- DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", ");
- WorkList.push_back(SubRCId);
- // Remember that we saw the sub class.
- Covered.set(SubRCId);
- First = false;
- }
- }
- if (!First)
- DEBUG(dbgs() << '\n');
-
- // Push also all the register classes that can be accessed via a
- // subreg index, i.e., its subreg-class (which is different than
- // its subclass).
- //
- // Note: It would probably be faster to go the other way around
- // and have this method add only super classes, since this
- // information is available in a more efficient way. However, it
- // feels less natural for the client of this APIs plus we will
- // TableGen the whole bitset at some point, so compile time for
- // the initialization is not very important.
- First = true;
- for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) {
- if (Covered.test(SubRCId))
- continue;
- bool Pushed = false;
- const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId);
- for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid();
- ++SuperRCIt) {
- if (Pushed)
- break;
- for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid();
- ++It) {
- unsigned SuperRCId = It.getID();
- if (SuperRCId == RCId) {
- if (First)
- DEBUG(dbgs() << " Enqueue subreg-class: ");
- DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", ");
- WorkList.push_back(SubRCId);
- // Remember that we saw the sub class.
- Covered.set(SubRCId);
- Pushed = true;
- First = false;
- break;
- }
- }
- }
- }
- if (!First)
- DEBUG(dbgs() << '\n');
- } while (!WorkList.empty());
-}
-
const RegisterBank *
RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
@@ -173,11 +91,9 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- if (RegClassOrBank.is<const RegisterBank *>())
- return RegClassOrBank.get<const RegisterBank *>();
- const TargetRegisterClass *RC =
- RegClassOrBank.get<const TargetRegisterClass *>();
- if (RC)
+ if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+ return RB;
+ if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
return &getRegBankFromRegClass(*RC);
return nullptr;
}
@@ -199,10 +115,37 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
return &RegBank;
}
+const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
+ unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+
+ // If the register already has a class, fallback to MRI::constrainRegClass.
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (RegClassOrBank.is<const TargetRegisterClass *>())
+ return MRI.constrainRegClass(Reg, &RC);
+
+ const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ // Otherwise, all we can do is ensure the bank covers the class, and set it.
+ if (RB && !RB->covers(RC))
+ return nullptr;
+
+ // If nothing was set or the class is simply compatible, set it.
+ MRI.setRegClass(Reg, &RC);
+ return &RC;
+}
+
RegisterBankInfo::InstructionMapping
RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+ // For copies we want to walk over the operands and try to find one
+ // that has a register bank since the instruction itself will not get
+ // us any constraint.
+ bool isCopyLike = MI.isCopy() || MI.isPHI();
+ // For copy like instruction, only the mapping of the definition
+ // is important. The rest is not constrained.
+ unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+
RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
- MI.getNumOperands());
+ /*OperandsMapping*/ nullptr,
+ NumOperandsForMapping);
const MachineFunction &MF = *MI.getParent()->getParent();
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -213,14 +156,10 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// Before doing anything complicated check if the mapping is not
// directly available.
bool CompleteMapping = true;
- // For copies we want to walk over the operands and try to find one
- // that has a register bank.
- bool isCopyLike = MI.isCopy() || MI.isPHI();
- // Remember the register bank for reuse for copy-like instructions.
- const RegisterBank *RegBank = nullptr;
- // Remember the size of the register for reuse for copy-like instructions.
- unsigned RegSize = 0;
- for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+
+ SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping);
+ for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+ ++OpIdx) {
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
@@ -242,71 +181,147 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// the register bank from the encoding constraints.
CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
if (!CurRegBank) {
- // Check if we can deduce the register bank from the type of
- // the instruction.
- Type *MITy = MI.getType();
- if (MITy)
- CurRegBank = getRegBankForType(
- MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy);
- if (!CurRegBank)
- // Use the current assigned register bank.
- // That may not make much sense though.
- CurRegBank = AltRegBank;
- if (!CurRegBank) {
- // All our attempts failed, give up.
- CompleteMapping = false;
-
- if (!isCopyLike)
- // MI does not carry enough information to guess the mapping.
- return InstructionMapping();
-
- // For copies, we want to keep interating to find a register
- // bank for the other operands if we did not find one yet.
- if (RegBank)
- break;
- continue;
- }
+ // All our attempts failed, give up.
+ CompleteMapping = false;
+
+ if (!isCopyLike)
+ // MI does not carry enough information to guess the mapping.
+ return InstructionMapping();
+ continue;
}
}
- RegBank = CurRegBank;
- RegSize = getSizeInBits(Reg, MRI, TRI);
- Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank);
+ const ValueMapping *ValMapping =
+ &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+ if (isCopyLike) {
+ OperandsMapping[0] = ValMapping;
+ CompleteMapping = true;
+ break;
+ }
+ OperandsMapping[OpIdx] = ValMapping;
}
- if (CompleteMapping)
- return Mapping;
-
- assert(isCopyLike && "We should have bailed on non-copies at this point");
- // For copy like instruction, if none of the operand has a register
- // bank avialable, there is nothing we can propagate.
- if (!RegBank)
+ if (isCopyLike && !CompleteMapping)
+ // No way to deduce the type from what we have.
return InstructionMapping();
- // This is a copy-like instruction.
- // Propagate RegBank to all operands that do not have a
- // mapping yet.
- for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
- const MachineOperand &MO = MI.getOperand(OpIdx);
- // Don't assign a mapping for non-reg operands.
- if (!MO.isReg())
- continue;
+ assert(CompleteMapping && "Setting an uncomplete mapping");
+ Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping));
+ return Mapping;
+}
- // If a mapping already exists, do not touch it.
- if (!static_cast<const InstructionMapping *>(&Mapping)
- ->getOperandMapping(OpIdx)
- .BreakDown.empty())
- continue;
+/// Hashing function for PartialMapping.
+static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank *RegBank) {
+ return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0);
+}
+
+/// Overloaded version of hash_value for a PartialMapping.
+hash_code
+llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) {
+ return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length,
+ PartMapping.RegBank);
+}
+
+const RegisterBankInfo::PartialMapping &
+RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank &RegBank) const {
+ ++NumPartialMappingsAccessed;
+
+ hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank);
+ const auto &It = MapOfPartialMappings.find(Hash);
+ if (It != MapOfPartialMappings.end())
+ return *It->second;
+
+ ++NumPartialMappingsCreated;
+
+ const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
+ PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+ return *PartMapping;
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length,
+ const RegisterBank &RegBank) const {
+ return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1);
+}
+
+static hash_code
+hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown,
+ unsigned NumBreakDowns) {
+ if (LLVM_LIKELY(NumBreakDowns == 1))
+ return hash_value(*BreakDown);
+ SmallVector<size_t, 8> Hashes(NumBreakDowns);
+ for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx)
+ Hashes.push_back(hash_value(BreakDown[Idx]));
+ return hash_combine_range(Hashes.begin(), Hashes.end());
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
+ unsigned NumBreakDowns) const {
+ ++NumValueMappingsAccessed;
+
+ hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns);
+ const auto &It = MapOfValueMappings.find(Hash);
+ if (It != MapOfValueMappings.end())
+ return *It->second;
+
+ ++NumValueMappingsCreated;
+
+ const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
+ ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+ return *ValMapping;
+}
- Mapping.setOperandMapping(OpIdx, RegSize, *RegBank);
+template <typename Iterator>
+const RegisterBankInfo::ValueMapping *
+RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
+
+ ++NumOperandsMappingsAccessed;
+
+ // The addresses of the value mapping are unique.
+ // Therefore, we can use them directly to hash the operand mapping.
+ hash_code Hash = hash_combine_range(Begin, End);
+ const auto &It = MapOfOperandsMappings.find(Hash);
+ if (It != MapOfOperandsMappings.end())
+ return It->second;
+
+ ++NumOperandsMappingsCreated;
+
+ // Create the array of ValueMapping.
+ // Note: this array will not hash to this instance of operands
+ // mapping, because we use the pointer of the ValueMapping
+ // to hash and we expect them to uniquely identify an instance
+ // of value mapping.
+ ValueMapping *&Res = MapOfOperandsMappings[Hash];
+ Res = new ValueMapping[std::distance(Begin, End)];
+ unsigned Idx = 0;
+ for (Iterator It = Begin; It != End; ++It, ++Idx) {
+ const ValueMapping *ValMap = *It;
+ if (!ValMap)
+ continue;
+ Res[Idx] = *ValMap;
}
- return Mapping;
+ return Res;
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+ const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping)
+ const {
+ return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+ std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping)
+ const {
+ return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
}
RegisterBankInfo::InstructionMapping
RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
- RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
- if (Mapping.isValid())
- return Mapping;
+ RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
llvm_unreachable("The target must implement this");
}
@@ -335,18 +350,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
MachineInstr &MI = OpdMapper.getMI();
DEBUG(dbgs() << "Applying default-like mapping\n");
- for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
- ++OpIdx) {
+ for (unsigned OpIdx = 0,
+ EndIdx = OpdMapper.getInstrMapping().getNumOperands();
+ OpIdx != EndIdx; ++OpIdx) {
DEBUG(dbgs() << "OpIdx " << OpIdx);
MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg()) {
DEBUG(dbgs() << " is not a register, nothing to be done\n");
continue;
}
- assert(
- OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() ==
- 1 &&
- "This mapping is too complex for this function");
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
+ 1 &&
+ "This mapping is too complex for this function");
iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
if (NewRegs.begin() == NewRegs.end()) {
@@ -369,7 +384,8 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
// get the size of that register class.
RC = TRI.getMinimalPhysRegClass(Reg);
} else {
- unsigned RegSize = MRI.getSize(Reg);
+ LLT Ty = MRI.getType(Reg);
+ unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
// If Reg is not a generic register, query the register class to
// get its size.
if (RegSize)
@@ -384,7 +400,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
//------------------------------------------------------------------------------
// Helper classes implementation.
//------------------------------------------------------------------------------
-void RegisterBankInfo::PartialMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
@@ -392,7 +408,7 @@ void RegisterBankInfo::PartialMapping::dump() const {
bool RegisterBankInfo::PartialMapping::verify() const {
assert(RegBank && "Register bank not set");
assert(Length && "Empty mapping");
- assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?");
+ assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
// Check if the minimum width fits into RegBank.
assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
return true;
@@ -406,10 +422,10 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
OS << "nullptr";
}
-bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
- assert(!BreakDown.empty() && "Value mapped nowhere?!");
+bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
+ assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
- for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
// Check that each register bank is big enough to hold the partial value:
// this check is done by PartialMapping::verify
assert(PartMap.verify() && "Partial mapping is invalid");
@@ -418,9 +434,10 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
OrigValueBitWidth =
std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
}
- assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match");
+ assert(OrigValueBitWidth >= MeaningfulBitWidth &&
+ "Meaningful bits not covered by the mapping");
APInt ValueMask(OrigValueBitWidth, 0);
- for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
// Check that the union of the partial mappings covers the whole value,
// without overlaps.
// The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
@@ -434,15 +451,15 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
return true;
}
-void RegisterBankInfo::ValueMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
- OS << "#BreakDown: " << BreakDown.size() << " ";
+ OS << "#BreakDown: " << NumBreakDowns << " ";
bool IsFirst = true;
- for (const PartialMapping &PartMap : BreakDown) {
+ for (const PartialMapping &PartMap : *this) {
if (!IsFirst)
OS << ", ";
OS << '[' << PartMap << ']';
@@ -450,21 +467,13 @@ void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
}
}
-void RegisterBankInfo::InstructionMapping::setOperandMapping(
- unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) {
- // Build the value mapping.
- assert(MaskSize <= RegBank.getSize() && "Register bank is too small");
-
- // Create the mapping object.
- getOperandMapping(OpIdx).BreakDown.push_back(
- PartialMapping(0, MaskSize, RegBank));
-}
-
bool RegisterBankInfo::InstructionMapping::verify(
const MachineInstr &MI) const {
// Check that all the register operands are properly mapped.
// Check the constructor invariant.
- assert(NumOperands == MI.getNumOperands() &&
+ // For PHI, we only care about mapping the definition.
+ assert(NumOperands ==
+ ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
"NumOperands must match, see constructor");
assert(MI.getParent() && MI.getParent()->getParent() &&
"MI must be connected to a MachineFunction");
@@ -473,16 +482,18 @@ bool RegisterBankInfo::InstructionMapping::verify(
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
const MachineOperand &MO = MI.getOperand(Idx);
- const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
- (void)MOMapping;
if (!MO.isReg()) {
- assert(MOMapping.BreakDown.empty() &&
+ assert(!getOperandMapping(Idx).isValid() &&
"We should not care about non-reg mapping");
continue;
}
unsigned Reg = MO.getReg();
if (!Reg)
continue;
+ assert(getOperandMapping(Idx).isValid() &&
+ "We must have a mapping for reg operands");
+ const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+ (void)MOMapping;
// Register size in bits.
// This size must match what the mapping expects.
assert(MOMapping.verify(getSizeInBits(
@@ -492,7 +503,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
return true;
}
-void RegisterBankInfo::InstructionMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
@@ -514,18 +525,16 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
MachineInstr &MI, const InstructionMapping &InstrMapping,
MachineRegisterInfo &MRI)
: MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
- unsigned NumOpds = MI.getNumOperands();
- OpToNewVRegIdx.reset(new int[NumOpds]);
- std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds],
- OperandsMapper::DontKnowIdx);
+ unsigned NumOpds = InstrMapping.getNumOperands();
+ OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx);
assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
}
iterator_range<SmallVectorImpl<unsigned>::iterator>
RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
- assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
unsigned NumPartialVal =
- getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
int StartIdx = OpToNewVRegIdx[OpIdx];
if (StartIdx == OperandsMapper::DontKnowIdx) {
@@ -559,16 +568,15 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
}
void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
- assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
getVRegsMem(OpIdx);
- const SmallVectorImpl<PartialMapping> &PartMapList =
- getInstrMapping().getOperandMapping(OpIdx).BreakDown;
- SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin();
+ const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
+ const PartialMapping *PartMap = ValMapping.begin();
for (unsigned &NewVReg : NewVRegsForOpIdx) {
- assert(PartMap != PartMapList.end() && "Out-of-bound access");
+ assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
- NewVReg = MRI.createGenericVirtualRegister(PartMap->Length);
+ NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
MRI.setRegBank(NewVReg, *PartMap->RegBank);
++PartMap;
}
@@ -577,8 +585,8 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
unsigned PartialMapIdx,
unsigned NewVReg) {
- assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
- assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+ assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
PartialMapIdx &&
"Out-of-bound access for partial mapping");
// Make sure the memory is initialized for that operand.
@@ -592,14 +600,14 @@ iterator_range<SmallVectorImpl<unsigned>::const_iterator>
RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
bool ForDebug) const {
(void)ForDebug;
- assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
int StartIdx = OpToNewVRegIdx[OpIdx];
if (StartIdx == OperandsMapper::DontKnowIdx)
return make_range(NewVRegs.end(), NewVRegs.end());
unsigned PartMapSize =
- getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
SmallVectorImpl<unsigned>::const_iterator End =
getNewVRegsEnd(StartIdx, PartMapSize);
iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
@@ -611,14 +619,14 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
return Res;
}
-void RegisterBankInfo::OperandsMapper::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
print(dbgs(), true);
dbgs() << '\n';
}
void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
bool ForDebug) const {
- unsigned NumOpds = getMI().getNumOperands();
+ unsigned NumOpds = getInstrMapping().getNumOperands();
if (ForDebug) {
OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
// Print out the internal state of the index table.
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
new file mode 100644
index 0000000..e500918
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -0,0 +1,45 @@
+//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file This file implements the utility functions used by the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "globalisel-utils"
+
+using namespace llvm;
+
+unsigned llvm::constrainOperandRegClass(
+ const MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
+ unsigned Reg, unsigned OpIdx) {
+ // Assume physical registers are properly constrained.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "PhysReg not implemented");
+
+ const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+
+ if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
+ unsigned NewReg = MRI.createVirtualRegister(RegClass);
+ BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewReg)
+ .addReg(Reg);
+ return NewReg;
+ }
+
+ return Reg;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 8c760b7..1ea5349 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -182,9 +182,7 @@ namespace {
bool runOnFunction(Function &F) override;
bool doFinalization(Module &M) override;
- const char *getPassName() const override {
- return "Merge internal globals";
- }
+ StringRef getPassName() const override { return "Merge internal globals"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -434,6 +432,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
+ bool HasExternal = false;
+ StringRef FirstExternalName;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
MergedSize += DL.getTypeAllocSize(Ty);
@@ -442,19 +442,46 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
}
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
+
+ if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+ HasExternal = true;
+ FirstExternalName = Globals[j]->getName();
+ }
}
+ // If merged variables doesn't have external linkage, we needn't to expose
+ // the symbol after merging.
+ GlobalValue::LinkageTypes Linkage = HasExternal
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
StructType *MergedTy = StructType::get(M.getContext(), Tys);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable *MergedGV = new GlobalVariable(
- M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
- "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+ // On Darwin external linkage needs to be preserved, otherwise
+ // dsymutil cannot preserve the debug info for the merged
+ // variables. If they have external linkage, use the symbol name
+ // of the first variable merged as the suffix of global symbol
+ // name. This avoids a link-time naming conflict for the
+ // _MergedGlobals symbols.
+ Twine MergedName =
+ (IsMachO && HasExternal)
+ ? "_MergedGlobals_" + FirstExternalName
+ : "_MergedGlobals";
+ auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
+ auto *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
+ GlobalVariable::NotThreadLocal, AddrSpace);
+
+ const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
std::string Name = Globals[k]->getName();
+ // Copy metadata while adjusting any debug info metadata by the original
+ // global's offset within the merged global.
+ MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx));
+
Constant *Idx[2] = {
ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, idx),
@@ -498,22 +525,18 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
collectUsedGlobalVariables(M);
- for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
- ++IFn) {
- for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
- IBB != IEndBB; ++IBB) {
- // Follow the invoke link to find the landing pad instruction
- const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
- if (!II) continue;
-
- const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
- // Look for globals in the clauses of the landing pad instruction
- for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
- Idx != NumClauses; ++Idx)
+ for (Function &F : M) {
+ for (BasicBlock &BB : F) {
+ Instruction *Pad = BB.getFirstNonPHI();
+ if (!Pad->isEHPad())
+ continue;
+
+ // Keep globals used by landingpads and catchpads.
+ for (const Use &U : Pad->operands()) {
if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
- ->stripPointerCasts()))
+ dyn_cast<GlobalVariable>(U->stripPointerCasts()))
MustKeepGlobalVariables.insert(GV);
+ }
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index d225162..b9f3d86 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/Passes.h"
#include "BranchFolding.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -58,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
+ cl::init(false), cl::Hidden);
static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
cl::init(true), cl::Hidden);
@@ -68,6 +71,7 @@ STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
STATISTIC(NumDupBBs, "Number of duplicated blocks");
STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
@@ -82,10 +86,12 @@ namespace {
ICTriangleRev, // Same as ICTriangle, but true path rev condition.
ICTriangleFalse, // Same as ICTriangle, but on the false path.
ICTriangle, // BB is entry of a triangle sub-CFG.
- ICDiamond // BB is entry of a diamond sub-CFG.
+ ICDiamond, // BB is entry of a diamond sub-CFG.
+ ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
+ // common tail that can be shared.
};
- /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// One per MachineBasicBlock, this is used to cache the result
/// if-conversion feasibility analysis. This includes results from
/// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
/// classification, and common tail block of its successors (if it's a
@@ -114,6 +120,7 @@ namespace {
bool IsAnalyzed : 1;
bool IsEnqueued : 1;
bool IsBrAnalyzable : 1;
+ bool IsBrReversible : 1;
bool HasFallThrough : 1;
bool IsUnpredicable : 1;
bool CannotBeCopied : 1;
@@ -128,13 +135,14 @@ namespace {
SmallVector<MachineOperand, 4> Predicate;
BBInfo() : IsDone(false), IsBeingAnalyzed(false),
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
- HasFallThrough(false), IsUnpredicable(false),
- CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+ IsBrReversible(false), HasFallThrough(false),
+ IsUnpredicable(false), CannotBeCopied(false),
+ ClobbersPred(false), NonPredSize(0), ExtraCost(0),
+ ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
FalseBB(nullptr) {}
};
- /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// Record information about pending if-conversions to attempt:
/// BBI - Corresponding BBInfo.
/// Kind - Type of block. See IfcvtKind.
/// NeedSubsumption - True if the to-be-predicated BB has already been
@@ -148,15 +156,19 @@ namespace {
struct IfcvtToken {
BBInfo &BBI;
IfcvtKind Kind;
- bool NeedSubsumption;
unsigned NumDups;
unsigned NumDups2;
- IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
- : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ bool NeedSubsumption : 1;
+ bool TClobbersPred : 1;
+ bool FClobbersPred : 1;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
+ bool tc = false, bool fc = false)
+ : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
+ TClobbersPred(tc), FClobbersPred(fc) {}
};
- /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
- /// basic block number.
+ /// Results of if-conversion feasibility analysis indexed by basic block
+ /// number.
std::vector<BBInfo> BBAnalysis;
TargetSchedModel SchedModel;
@@ -172,11 +184,11 @@ namespace {
bool PreRegAlloc;
bool MadeChange;
int FnNum;
- std::function<bool(const Function &)> PredicateFtor;
+ std::function<bool(const MachineFunction &)> PredicateFtor;
public:
static char ID;
- IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
+ IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
: MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
initializeIfConverterPass(*PassRegistry::getPassRegistry());
}
@@ -191,31 +203,58 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
private:
- bool ReverseBranchCondition(BBInfo &BBI);
+ bool reverseBranchCondition(BBInfo &BBI) const;
bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
BranchProbability Prediction) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const;
+ bool CountDuplicatedInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ unsigned &Dups1, unsigned &Dups2,
+ MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+ bool SkipUnconditionalBranches) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
- unsigned &Dups1, unsigned &Dups2) const;
- void ScanInstructions(BBInfo &BBI);
- void AnalyzeBlock(MachineBasicBlock *MBB,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+ bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+ void AnalyzeBranches(BBInfo &BBI);
+ void ScanInstructions(BBInfo &BBI,
+ MachineBasicBlock::iterator &Begin,
+ MachineBasicBlock::iterator &End,
+ bool BranchUnpredicable = false) const;
+ bool RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const;
+ void AnalyzeBlock(MachineBasicBlock &MBB,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
- bool isTriangle = false, bool RevBranch = false);
+ bool isTriangle = false, bool RevBranch = false,
+ bool hasCommonTail = false);
void AnalyzeBlocks(MachineFunction &MF,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
- void InvalidatePreds(MachineBasicBlock *BB);
+ void InvalidatePreds(MachineBasicBlock &MBB);
void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveBranch, bool MergeAddEdges);
bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
- unsigned NumDups1, unsigned NumDups2);
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbers, bool FClobbers);
+ bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbers, bool FClobbers);
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
@@ -242,12 +281,12 @@ namespace {
Prediction);
}
- // blockAlwaysFallThrough - Block ends without a terminator.
+ /// Returns true if Block ends without a terminator.
bool blockAlwaysFallThrough(BBInfo &BBI) const {
return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
}
- // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ /// Used to sort if-conversion candidates.
static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
const std::unique_ptr<IfcvtToken> &C2) {
int Incr1 = (C1->Kind == ICDiamond)
@@ -282,8 +321,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()) ||
- (PredicateFtor && !PredicateFtor(*MF.getFunction())))
+ if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -402,11 +440,26 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
<< BBI.TrueBB->getNumber() << ",F:"
<< BBI.FalseBB->getNumber() << ") ");
- RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
+ Token->TClobbersPred,
+ Token->FClobbersPred);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) ++NumDiamonds;
break;
}
+ case ICForkedDiamond: {
+ if (DisableForkedDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
+ << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
+ Token->TClobbersPred,
+ Token->FClobbersPred);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumForkedDiamonds;
+ break;
+ }
}
Change |= RetVal;
@@ -435,46 +488,42 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
-/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
-/// its 'true' successor.
+/// BB has a fallthrough. Find its 'false' successor given its 'true' successor.
static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
MachineBasicBlock *TrueBB) {
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- E = BB->succ_end(); SI != E; ++SI) {
- MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock *SuccBB : BB->successors()) {
if (SuccBB != TrueBB)
return SuccBB;
}
return nullptr;
}
-/// ReverseBranchCondition - Reverse the condition of the end of the block
-/// branch. Swap block's 'true' and 'false' successors.
-bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+/// Reverse the condition of the end of the block branch. Swap block's 'true'
+/// and 'false' successors.
+bool IfConverter::reverseBranchCondition(BBInfo &BBI) const {
DebugLoc dl; // FIXME: this is nowhere
- if (!TII->ReverseBranchCondition(BBI.BrCond)) {
- TII->RemoveBranch(*BBI.BB);
- TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ if (!TII->reverseBranchCondition(BBI.BrCond)) {
+ TII->removeBranch(*BBI.BB);
+ TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
std::swap(BBI.TrueBB, BBI.FalseBB);
return true;
}
return false;
}
-/// getNextBlock - Returns the next block in the function blocks ordering. If
-/// it is the end, returns NULL.
-static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
- MachineFunction::iterator I = BB->getIterator();
- MachineFunction::iterator E = BB->getParent()->end();
+/// Returns the next block in the function blocks ordering. If it is the end,
+/// returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) {
+ MachineFunction::iterator I = MBB.getIterator();
+ MachineFunction::iterator E = MBB.getParent()->end();
if (++I == E)
return nullptr;
return &*I;
}
-/// ValidSimple - Returns true if the 'true' block (along with its
-/// predecessor) forms a valid simple shape for ifcvt. It also returns the
-/// number of instructions that the ifcvt would need to duplicate if performed
-/// in Dups.
+/// Returns true if the 'true' block (along with its predecessor) forms a valid
+/// simple shape for ifcvt. It also returns the number of instructions that the
+/// ifcvt would need to duplicate if performed in Dups.
bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
BranchProbability Prediction) const {
Dups = 0;
@@ -495,12 +544,11 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
return true;
}
-/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid triangle shape for ifcvt.
-/// If 'FalseBranch' is true, it checks if 'true' block's false branch
-/// branches to the 'false' block rather than the other way around. It also
-/// returns the number of instructions that the ifcvt would need to duplicate
-/// if performed in 'Dups'.
+/// Returns true if the 'true' and 'false' blocks (along with their common
+/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is
+/// true, it checks if 'true' block's false branch branches to the 'false' block
+/// rather than the other way around. It also returns the number of instructions
+/// that the ifcvt would need to duplicate if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const {
@@ -540,122 +588,353 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid diamond shape for ifcvt.
-bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
- unsigned &Dups1, unsigned &Dups2) const {
- Dups1 = Dups2 = 0;
- if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
- FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
- return false;
-
- MachineBasicBlock *TT = TrueBBI.TrueBB;
- MachineBasicBlock *FT = FalseBBI.TrueBB;
-
- if (!TT && blockAlwaysFallThrough(TrueBBI))
- TT = getNextBlock(TrueBBI.BB);
- if (!FT && blockAlwaysFallThrough(FalseBBI))
- FT = getNextBlock(FalseBBI.BB);
- if (TT != FT)
- return false;
- if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
- return false;
- if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
- return false;
+/// Shrink the provided inclusive range by one instruction.
+/// If the range was one instruction (\p It == \p Begin), It is not modified,
+/// but \p Empty is set to true.
+static inline void shrinkInclusiveRange(
+ MachineBasicBlock::iterator &Begin,
+ MachineBasicBlock::iterator &It,
+ bool &Empty) {
+ if (It == Begin)
+ Empty = true;
+ else
+ It--;
+}
- // FIXME: Allow true block to have an early exit?
- if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
- (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
- return false;
+/// Count duplicated instructions and move the iterators to show where they
+/// are.
+/// @param TIB True Iterator Begin
+/// @param FIB False Iterator Begin
+/// These two iterators initially point to the first instruction of the two
+/// blocks, and finally point to the first non-shared instruction.
+/// @param TIE True Iterator End
+/// @param FIE False Iterator End
+/// These two iterators initially point to End() for the two blocks() and
+/// finally point to the first shared instruction in the tail.
+/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of
+/// two blocks.
+/// @param Dups1 count of duplicated instructions at the beginning of the 2
+/// blocks.
+/// @param Dups2 count of duplicated instructions at the end of the 2 blocks.
+/// @param SkipUnconditionalBranches if true, Don't make sure that
+/// unconditional branches at the end of the blocks are the same. True is
+/// passed when the blocks are analyzable to allow for fallthrough to be
+/// handled.
+/// @return false if the shared portion prevents if conversion.
+bool IfConverter::CountDuplicatedInstructions(
+ MachineBasicBlock::iterator &TIB,
+ MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE,
+ MachineBasicBlock::iterator &FIE,
+ unsigned &Dups1, unsigned &Dups2,
+ MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+ bool SkipUnconditionalBranches) const {
- // Count duplicate instructions at the beginning of the true and false blocks.
- MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
- MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
- MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- if (TIB->isDebugValue()) {
- while (TIB != TIE && TIB->isDebugValue())
- ++TIB;
- if (TIB == TIE)
- break;
- }
- if (FIB->isDebugValue()) {
- while (FIB != FIE && FIB->isDebugValue())
- ++FIB;
- if (FIB == FIE)
- break;
- }
+ TIB = skipDebugInstructionsForward(TIB, TIE);
+ if(TIB == TIE)
+ break;
+ FIB = skipDebugInstructionsForward(FIB, FIE);
+ if(FIB == FIE)
+ break;
if (!TIB->isIdenticalTo(*FIB))
break;
- ++Dups1;
+ // A pred-clobbering instruction in the shared portion prevents
+ // if-conversion.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(*TIB, PredDefs))
+ return false;
+ // If we get all the way to the branch instructions, don't count them.
+ if (!TIB->isBranch())
+ ++Dups1;
++TIB;
++FIB;
}
- // Now, in preparation for counting duplicate instructions at the ends of the
- // blocks, move the end iterators up past any branch instructions.
- // If both blocks are returning don't skip the branches, since they will
- // likely be both identical return instructions. In such cases the return
- // can be left unpredicated.
// Check for already containing all of the block.
if (TIB == TIE || FIB == FIE)
return true;
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
--TIE;
--FIE;
- if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
- while (TIE != TIB && TIE->isBranch())
- --TIE;
- while (FIE != FIB && FIE->isBranch())
- --FIE;
+
+ // After this point TIB and TIE define an inclusive range, which means that
+ // TIB == TIE is true when there is one more instruction to consider, not at
+ // the end. Because we may not be able to go before TIB, we need a flag to
+ // indicate a completely empty range.
+ bool TEmpty = false, FEmpty = false;
+
+ // Upon exit TIE and FIE will both point at the last non-shared instruction.
+ // They need to be moved forward to point past the last non-shared
+ // instruction if the range they delimit is non-empty.
+ auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
+ if (!TEmpty)
+ ++TIE;
+ if (!FEmpty)
+ ++FIE;
+ });
+
+ if (!TBB.succ_empty() || !FBB.succ_empty()) {
+ if (SkipUnconditionalBranches) {
+ while (!TEmpty && TIE->isUnconditionalBranch())
+ shrinkInclusiveRange(TIB, TIE, TEmpty);
+ while (!FEmpty && FIE->isUnconditionalBranch())
+ shrinkInclusiveRange(FIB, FIE, FEmpty);
+ }
}
// If Dups1 includes all of a block, then don't count duplicate
// instructions at the end of the blocks.
- if (TIB == TIE || FIB == FIE)
+ if (TEmpty || FEmpty)
return true;
// Count duplicate instructions at the ends of the blocks.
- while (TIE != TIB && FIE != FIB) {
+ while (!TEmpty && !FEmpty) {
// Skip dbg_value instructions. These do not count.
- if (TIE->isDebugValue()) {
- while (TIE != TIB && TIE->isDebugValue())
- --TIE;
- if (TIE == TIB)
- break;
+ TIE = skipDebugInstructionsBackward(TIE, TIB);
+ FIE = skipDebugInstructionsBackward(FIE, FIB);
+ TEmpty = TIE == TIB && TIE->isDebugValue();
+ FEmpty = FIE == FIB && FIE->isDebugValue();
+ if (TEmpty || FEmpty)
+ break;
+ if (!TIE->isIdenticalTo(*FIE))
+ break;
+ // We have to verify that any branch instructions are the same, and then we
+ // don't count them toward the # of duplicate instructions.
+ if (!TIE->isBranch())
+ ++Dups2;
+ shrinkInclusiveRange(TIB, TIE, TEmpty);
+ shrinkInclusiveRange(FIB, FIE, FEmpty);
+ }
+ return true;
+}
+
+/// RescanInstructions - Run ScanInstructions on a pair of blocks.
+/// @param TIB - True Iterator Begin, points to first non-shared instruction
+/// @param FIB - False Iterator Begin, points to first non-shared instruction
+/// @param TIE - True Iterator End, points past last non-shared instruction
+/// @param FIE - False Iterator End, points past last non-shared instruction
+/// @param TrueBBI - BBInfo to update for the true block.
+/// @param FalseBBI - BBInfo to update for the false block.
+/// @returns - false if either block cannot be predicated or if both blocks end
+/// with a predicate-clobbering instruction.
+bool IfConverter::RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const {
+ bool BranchUnpredicable = true;
+ TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false;
+ ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
+ if (TrueBBI.IsUnpredicable)
+ return false;
+ ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
+ if (FalseBBI.IsUnpredicable)
+ return false;
+ if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
+ return false;
+ return true;
+}
+
+#ifndef NDEBUG
+static void verifySameBranchInstructions(
+ MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ MachineBasicBlock::iterator B1 = MBB1->begin();
+ MachineBasicBlock::iterator B2 = MBB2->begin();
+ MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
+ MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
+ bool Empty1 = false, Empty2 = false;
+ while (!Empty1 && !Empty2) {
+ E1 = skipDebugInstructionsBackward(E1, B1);
+ E2 = skipDebugInstructionsBackward(E2, B2);
+ Empty1 = E1 == B1 && E1->isDebugValue();
+ Empty2 = E2 == B2 && E2->isDebugValue();
+
+ if (Empty1 && Empty2)
+ break;
+
+ if (Empty1) {
+ assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
+ break;
}
- if (FIE->isDebugValue()) {
- while (FIE != FIB && FIE->isDebugValue())
- --FIE;
- if (FIE == FIB)
- break;
+ if (Empty2) {
+ assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
+ break;
}
- if (!TIE->isIdenticalTo(*FIE))
+
+ if (E1->isBranch() || E2->isBranch())
+ assert(E1->isIdenticalTo(*E2) &&
+ "Branch mis-match, branch instructions don't match.");
+ else
break;
- ++Dups2;
- --TIE;
- --FIE;
+ shrinkInclusiveRange(B1, E1, Empty1);
+ shrinkInclusiveRange(B2, E2, Empty2);
+ }
+}
+#endif
+
+/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) form a diamond if a common tail block is
+/// extracted.
+/// While not strictly a diamond, this pattern would form a diamond if
+/// tail-merging had merged the shared tails.
+/// EBB
+/// _/ \_
+/// | |
+/// TBB FBB
+/// / \ / \
+/// FalseBB TrueBB FalseBB
+/// Currently only handles analyzable branches.
+/// Specifically excludes actual diamonds to avoid overlap.
+bool IfConverter::ValidForkedDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
+ return false;
+ // Don't IfConvert blocks that can't be folded into their predecessor.
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // This function is specifically looking for conditional tails, as
+ // unconditional tails are already handled by the standard diamond case.
+ if (TrueBBI.BrCond.size() == 0 ||
+ FalseBBI.BrCond.size() == 0)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *TF = TrueBBI.FalseBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+ MachineBasicBlock *FF = FalseBBI.FalseBB;
+
+ if (!TT)
+ TT = getNextBlock(*TrueBBI.BB);
+ if (!TF)
+ TF = getNextBlock(*TrueBBI.BB);
+ if (!FT)
+ FT = getNextBlock(*FalseBBI.BB);
+ if (!FF)
+ FF = getNextBlock(*FalseBBI.BB);
+
+ if (!TT || !TF)
+ return false;
+
+ // Check successors. If they don't match, bail.
+ if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
+ return false;
+
+ bool FalseReversed = false;
+ if (TF == FT && TT == FF) {
+ // If the branches are opposing, but we can't reverse, don't do it.
+ if (!FalseBBI.IsBrReversible)
+ return false;
+ FalseReversed = true;
+ reverseBranchCondition(FalseBBI);
}
+ auto UnReverseOnExit = make_scope_exit([&]() {
+ if (FalseReversed)
+ reverseBranchCondition(FalseBBI);
+ });
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TrueBBI.BB, *FalseBBI.BB,
+ /* SkipUnconditionalBranches */ true))
+ return false;
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
return true;
}
-/// ScanInstructions - Scan all the instructions in the block to determine if
-/// the block is predicable. In most cases, that means all the instructions
-/// in the block are isPredicable(). Also checks if the block contains any
-/// instruction which can clobber a predicate (e.g. condition code register).
-/// If so, the block is not predicable unless it's the last instruction.
-void IfConverter::ScanInstructions(BBInfo &BBI) {
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(*TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(*FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB)
+ return false;
+
+ // Count duplicate instructions at the beginning and end of the true and
+ // false blocks.
+ // Skip unconditional branches only if we are considering an analyzable
+ // diamond. Otherwise the branches must be the same.
+ bool SkipUnconditionalBranches =
+ TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable;
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TrueBBI.BB, *FalseBBI.BB,
+ SkipUnconditionalBranches))
+ return false;
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+ return true;
+}
+
+/// AnalyzeBranches - Look at the branches at the end of a block to determine if
+/// the block is predicable.
+void IfConverter::AnalyzeBranches(BBInfo &BBI) {
if (BBI.IsDone)
return;
- bool AlreadyPredicated = !BBI.Predicate.empty();
- // First analyze the end of BB branches.
BBI.TrueBB = BBI.FalseBB = nullptr;
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ BBI.IsBrReversible = (RevCond.size() == 0) ||
+ !TII->reverseBranchCondition(RevCond);
BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
if (BBI.BrCond.size()) {
@@ -666,16 +945,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (!BBI.FalseBB) {
// Malformed bcc? True and false blocks are the same?
BBI.IsUnpredicable = true;
- return;
}
}
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI,
+ MachineBasicBlock::iterator &Begin,
+ MachineBasicBlock::iterator &End,
+ bool BranchUnpredicable) const {
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return;
+
+ bool AlreadyPredicated = !BBI.Predicate.empty();
- // Then scan all the instructions.
BBI.NonPredSize = 0;
BBI.ExtraCost = 0;
BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
- for (auto &MI : *BBI.BB) {
+ for (MachineInstr &MI : make_range(Begin, End)) {
if (MI.isDebugValue())
continue;
@@ -715,6 +1007,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool isPredicated = TII->isPredicated(MI);
bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
+ if (BranchUnpredicable && MI.isBranch()) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
// A conditional branch is not predicable, but it may be eliminated.
if (isCondBr)
continue;
@@ -756,13 +1053,24 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
}
}
-/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
-/// predicated by the specified predicate.
+/// Determine if the block is a suitable candidate to be predicated by the
+/// specified predicate.
+/// @param BBI BBInfo for the block to check
+/// @param Pred Predicate array for the branch that leads to BBI
+/// @param isTriangle true if the Analysis is for a triangle
+/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
+/// case
+/// @param hasCommonTail true if BBI shares a tail with a sibling block that
+/// contains any instruction that would make the block unpredicable.
bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
SmallVectorImpl<MachineOperand> &Pred,
- bool isTriangle, bool RevBranch) {
+ bool isTriangle, bool RevBranch,
+ bool hasCommonTail) {
// If the block is dead or unpredicable, then it cannot be predicated.
- if (BBI.IsDone || BBI.IsUnpredicable)
+ // Two blocks may share a common unpredicable tail, but this doesn't prevent
+ // them from being if-converted. The non-shared portion is assumed to have
+ // been checked
+ if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
return false;
// If it is already predicated but we couldn't analyze its terminator, the
@@ -776,7 +1084,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
return false;
- if (BBI.BrCond.size()) {
+ if (!hasCommonTail && BBI.BrCond.size()) {
if (!isTriangle)
return false;
@@ -784,10 +1092,10 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
if (RevBranch) {
- if (TII->ReverseBranchCondition(Cond))
+ if (TII->reverseBranchCondition(Cond))
return false;
}
- if (TII->ReverseBranchCondition(RevPred) ||
+ if (TII->reverseBranchCondition(RevPred) ||
!TII->SubsumesPredicate(Cond, RevPred))
return false;
}
@@ -795,13 +1103,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
return true;
}
-/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
-/// the specified block. Record its successors and whether it looks like an
-/// if-conversion candidate.
+/// Analyze the structure of the sub-CFG starting from the specified block.
+/// Record its successors and whether it looks like an if-conversion candidate.
void IfConverter::AnalyzeBlock(
- MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+ MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
struct BBState {
- BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
+ BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {}
MachineBasicBlock *MBB;
/// This flag is true if MBB's successors have been analyzed.
@@ -825,7 +1132,10 @@ void IfConverter::AnalyzeBlock(
BBI.BB = BB;
BBI.IsBeingAnalyzed = true;
- ScanInstructions(BBI);
+ AnalyzeBranches(BBI);
+ MachineBasicBlock::iterator Begin = BBI.BB->begin();
+ MachineBasicBlock::iterator End = BBI.BB->end();
+ ScanInstructions(BBI, Begin, End);
// Unanalyzable or ends with fallthrough or unconditional branch, or if is
// not considered for ifcvt anymore.
@@ -854,8 +1164,8 @@ void IfConverter::AnalyzeBlock(
// Push the False and True blocks to the stack.
State.SuccsAnalyzed = true;
- BBStack.push_back(BBI.FalseBB);
- BBStack.push_back(BBI.TrueBB);
+ BBStack.push_back(*BBI.FalseBB);
+ BBStack.push_back(*BBI.TrueBB);
continue;
}
@@ -871,7 +1181,7 @@ void IfConverter::AnalyzeBlock(
SmallVector<MachineOperand, 4>
RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
- bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+ bool CanRevCond = !TII->reverseBranchCondition(RevCond);
unsigned Dups = 0;
unsigned Dups2 = 0;
@@ -881,25 +1191,59 @@ void IfConverter::AnalyzeBlock(
BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
- if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
- TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
- *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
- FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
- Prediction) &&
- FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
- FeasibilityAnalysis(FalseBBI, RevCond)) {
- // Diamond:
- // EBB
- // / \_
- // | |
- // TBB FBB
- // \ /
- // TailBB
- // Note TailBB can be empty.
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
- BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
- Enqueued = true;
+ if (CanRevCond) {
+ BBInfo TrueBBICalc, FalseBBICalc;
+ auto feasibleDiamond = [&]() {
+ bool MeetsSize = MeetIfcvtSizeLimit(
+ *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
+ TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
+ *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
+ FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
+ Prediction);
+ bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true);
+ bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true);
+ return MeetsSize && TrueFeasible && FalseFeasible;
+ };
+
+ if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond()) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+ (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+ Enqueued = true;
+ }
+ } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond()) {
+ // ForkedDiamond:
+ // if TBB and FBB have a common tail that includes their conditional
+ // branch instructions, then we can If Convert this pattern.
+ // EBB
+ // _/ \_
+ // | |
+ // TBB FBB
+ // / \ / \
+ // FalseBB TrueBB FalseBB
+ //
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+ (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+ Enqueued = true;
+ }
+ }
}
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
@@ -985,25 +1329,23 @@ void IfConverter::AnalyzeBlock(
}
}
-/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
-/// candidates.
+/// Analyze all blocks and find entries for all if-conversion candidates.
void IfConverter::AnalyzeBlocks(
MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
- for (auto &BB : MF)
- AnalyzeBlock(&BB, Tokens);
+ for (MachineBasicBlock &MBB : MF)
+ AnalyzeBlock(MBB, Tokens);
// Sort to favor more complex ifcvt scheme.
std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
}
-/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
-/// that all the intervening blocks are empty (given BB can fall through to its
-/// next block).
-static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
- MachineFunction::iterator PI = BB->getIterator();
+/// Returns true either if ToMBB is the next block after MBB or that all the
+/// intervening blocks are empty (given MBB can fall through to its next block).
+static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
+ MachineFunction::iterator PI = MBB.getIterator();
MachineFunction::iterator I = std::next(PI);
- MachineFunction::iterator TI = ToBB->getIterator();
- MachineFunction::iterator E = BB->getParent()->end();
+ MachineFunction::iterator TI = ToMBB.getIterator();
+ MachineFunction::iterator E = MBB.getParent()->end();
while (I != TI) {
// Check isSuccessor to avoid case where the next block is empty, but
// it's not a successor.
@@ -1014,30 +1356,27 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
return true;
}
-/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
-/// to determine if it can be if-converted. If predecessor is already enqueued,
-/// dequeue it!
-void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
- for (const auto &Predecessor : BB->predecessors()) {
+/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
+/// can be if-converted. If predecessor is already enqueued, dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *Predecessor : MBB.predecessors()) {
BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
- if (PBBI.IsDone || PBBI.BB == BB)
+ if (PBBI.IsDone || PBBI.BB == &MBB)
continue;
PBBI.IsAnalyzed = false;
PBBI.IsEnqueued = false;
}
}
-/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
-///
-static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+/// Inserts an unconditional branch from \p MBB to \p ToMBB.
+static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
const TargetInstrInfo *TII) {
DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 0> NoCond;
- TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
+ TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
}
-/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
-/// successors.
+/// Remove true / false edges if either / both are no longer successors.
void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
@@ -1046,29 +1385,42 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
}
/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
-/// values defined in MI which are not live/used by MI.
+/// values defined in MI which are also live/used by MI.
static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
+ const TargetRegisterInfo *TRI = MI.getParent()->getParent()
+ ->getSubtarget().getRegisterInfo();
+
+ // Before stepping forward past MI, remember which regs were live
+ // before MI. This is needed to set the Undef flag only when reg is
+ // dead.
+ SparseSet<unsigned> LiveBeforeMI;
+ LiveBeforeMI.setUniverse(TRI->getNumRegs());
+ for (unsigned Reg : Redefs)
+ LiveBeforeMI.insert(Reg);
+
SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
Redefs.stepForward(MI, Clobbers);
// Now add the implicit uses for each of the clobbered values.
- for (auto Reg : Clobbers) {
+ for (auto Clobber : Clobbers) {
// FIXME: Const cast here is nasty, but better than making StepForward
// take a mutable instruction instead of const.
- MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+ unsigned Reg = Clobber.first;
+ MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
MachineInstr *OpMI = Op.getParent();
MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
if (Op.isRegMask()) {
// First handle regmasks. They clobber any entries in the mask which
// means that we need a def for those registers.
- MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+ if (LiveBeforeMI.count(Reg))
+ MIB.addReg(Reg, RegState::Implicit);
// We also need to add an implicit def of this register for the later
// use to read from.
// For the register allocator to have allocated a register clobbered
// by the call which is used later, it must be the case that
// the call doesn't return.
- MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
+ MIB.addReg(Reg, RegState::Implicit | RegState::Define);
continue;
}
assert(Op.isReg() && "Register operand required");
@@ -1078,13 +1430,23 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
if (Redefs.contains(Op.getReg()))
Op.setIsDead(false);
}
- MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+ if (LiveBeforeMI.count(Reg))
+ MIB.addReg(Reg, RegState::Implicit);
+ else {
+ bool HasLiveSubReg = false;
+ for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) {
+ if (!LiveBeforeMI.count(*S))
+ continue;
+ HasLiveSubReg = true;
+ break;
+ }
+ if (HasLiveSubReg)
+ MIB.addReg(Reg, RegState::Implicit);
+ }
}
}
-/**
- * Remove kill flags from operands with a registers in the @p DontKill set.
- */
+/// Remove kill flags from operands with a registers in the \p DontKill set.
static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
for (MIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->isKill())
@@ -1094,20 +1456,17 @@ static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
}
}
-/**
- * Walks a range of machine instructions and removes kill flags for registers
- * in the @p DontKill set.
- */
+/// Walks a range of machine instructions and removes kill flags for registers
+/// in the \p DontKill set.
static void RemoveKills(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E,
const LivePhysRegs &DontKill,
const MCRegisterInfo &MCRI) {
- for ( ; I != E; ++I)
- RemoveKills(*I, DontKill);
+ for (MachineInstr &MI : make_range(I, E))
+ RemoveKills(MI, DontKill);
}
-/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
-///
+/// If convert a simple (split, no rejoin) sub-CFG.
bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1118,54 +1477,58 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICSimpleFalse)
std::swap(CvtBBI, NextBBI);
+ MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+ MachineBasicBlock &NextMBB = *NextBBI->BB;
if (CvtBBI->IsDone ||
- (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
// Something has changed. It's no longer safe to predicate this block.
BBI.IsAnalyzed = false;
CvtBBI->IsAnalyzed = false;
return false;
}
- if (CvtBBI->BB->hasAddressTaken())
+ if (CvtMBB.hasAddressTaken())
// Conservatively abort if-conversion if BB's address is taken.
return false;
if (Kind == ICSimpleFalse)
- if (TII->ReverseBranchCondition(Cond))
+ if (TII->reverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
- // Initialize liveins to the first BB. These are potentiall redefined by
- // predicated instructions.
- Redefs.init(TRI);
- Redefs.addLiveIns(*CvtBBI->BB);
- Redefs.addLiveIns(*NextBBI->BB);
-
- // Compute a set of registers which must not be killed by instructions in
- // BB1: This is everything live-in to BB2.
- DontKill.init(TRI);
- DontKill.addLiveIns(*NextBBI->BB);
+ Redefs.init(*TRI);
+ DontKill.init(*TRI);
+
+ if (MRI->tracksLiveness()) {
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ // Compute a set of registers which must not be killed by instructions in
+ // BB1: This is everything live-in to BB2.
+ DontKill.addLiveIns(NextMBB);
+ }
- if (CvtBBI->BB->pred_size() > 1) {
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ if (CvtMBB.pred_size() > 1) {
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB, true);
+ BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
- RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
+ PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Merge converted block into entry block.
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI);
}
bool IterIfcvt = true;
- if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
- InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ if (!canFallThroughTo(*BBI.BB, NextMBB)) {
+ InsertUncondBranch(*BBI.BB, NextMBB, TII);
BBI.HasFallThrough = false;
// Now ifcvt'd block will look like this:
// BB:
@@ -1185,15 +1548,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
- InvalidatePreds(BBI.BB);
+ InvalidatePreds(*BBI.BB);
CvtBBI->IsDone = true;
// FIXME: Must maintain LiveIns.
return true;
}
-/// IfConvertTriangle - If convert a triangle sub-CFG.
-///
+/// If convert a triangle sub-CFG.
bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1205,29 +1567,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
std::swap(CvtBBI, NextBBI);
+ MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+ MachineBasicBlock &NextMBB = *NextBBI->BB;
if (CvtBBI->IsDone ||
- (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
// Something has changed. It's no longer safe to predicate this block.
BBI.IsAnalyzed = false;
CvtBBI->IsAnalyzed = false;
return false;
}
- if (CvtBBI->BB->hasAddressTaken())
+ if (CvtMBB.hasAddressTaken())
// Conservatively abort if-conversion if BB's address is taken.
return false;
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
- if (TII->ReverseBranchCondition(Cond))
+ if (TII->reverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
- if (ReverseBranchCondition(*CvtBBI)) {
+ if (reverseBranchCondition(*CvtBBI)) {
// BB has been changed, modify its predecessors (except for this
// one) so they don't get ifcvt'ed based on bad intel.
- for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
- E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
- MachineBasicBlock *PBB = *PI;
+ for (MachineBasicBlock *PBB : CvtMBB.predecessors()) {
if (PBB == BBI.BB)
continue;
BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
@@ -1241,9 +1603,11 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Initialize liveins to the first BB. These are potentially redefined by
// predicated instructions.
- Redefs.init(TRI);
- Redefs.addLiveIns(*CvtBBI->BB);
- Redefs.addLiveIns(*NextBBI->BB);
+ Redefs.init(*TRI);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(CvtMBB);
+ Redefs.addLiveIns(NextMBB);
+ }
DontKill.clear();
@@ -1251,29 +1615,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
if (HasEarlyExit) {
- // Get probabilities before modifying CvtBBI->BB and BBI.BB.
- CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
- CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
- BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
- BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
+ // Get probabilities before modifying CvtMBB and BBI.BB.
+ CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB);
+ CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB);
+ BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB);
+ BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
- if (CvtBBI->BB->pred_size() > 1) {
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ if (CvtMBB.pred_size() > 1) {
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
// RemoveExtraEdges won't work if the block has an unanalyzable branch, so
// explicitly remove CvtBBI as a successor.
- BBI.BB->removeSuccessor(CvtBBI->BB, true);
+ BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
// Predicate the 'true' block after removing its branch.
- CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
+ PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
// Now merge the entry of the triangle with the true block.
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
MergeBlocks(BBI, *CvtBBI, false);
}
@@ -1281,24 +1645,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
if (HasEarlyExit) {
SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
CvtBBI->BrCond.end());
- if (TII->ReverseBranchCondition(RevCond))
+ if (TII->reverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");
// Update the edge probability for both CvtBBI->FalseBB and NextBBI.
- // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
- // Prob(BBI.BB, NextBBI->BB) +
- // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+ // NewNext = New_Prob(BBI.BB, NextMBB) =
+ // Prob(BBI.BB, NextMBB) +
+ // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB)
// NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
- // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
- auto NewTrueBB = getNextBlock(BBI.BB);
+ // Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB)
+ auto NewTrueBB = getNextBlock(*BBI.BB);
auto NewNext = BBNext + BBCvt * CvtNext;
- auto NewTrueBBIter =
- std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+ auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB);
if (NewTrueBBIter != BBI.BB->succ_end())
BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
auto NewFalse = BBCvt * CvtFalse;
- TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+ TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
}
@@ -1306,18 +1669,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// predecessors. Otherwise, add an unconditional branch to 'false'.
bool FalseBBDead = false;
bool IterIfcvt = true;
- bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB);
if (!isFallThrough) {
// Only merge them if the true block does not fallthrough to the false
// block. By not merging them, we make it possible to iteratively
// ifcvt the blocks.
if (!HasEarlyExit &&
- NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
- !NextBBI->BB->hasAddressTaken()) {
+ NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
+ !NextMBB.hasAddressTaken()) {
MergeBlocks(BBI, *NextBBI);
FalseBBDead = true;
} else {
- InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ InsertUncondBranch(*BBI.BB, NextMBB, TII);
BBI.HasFallThrough = false;
}
// Mixed predicated and unpredicated code. This cannot be iteratively
@@ -1330,7 +1693,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
- InvalidatePreds(BBI.BB);
+ InvalidatePreds(*BBI.BB);
CvtBBI->IsDone = true;
if (FalseBBDead)
NextBBI->IsDone = true;
@@ -1339,23 +1702,25 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
return true;
}
-/// IfConvertDiamond - If convert a diamond sub-CFG.
-///
-bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
- unsigned NumDups1, unsigned NumDups2) {
- BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
- BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
- MachineBasicBlock *TailBB = TrueBBI.TrueBB;
- // True block must fall through or end with an unanalyzable terminator.
- if (!TailBB) {
- if (blockAlwaysFallThrough(TrueBBI))
- TailBB = FalseBBI.TrueBB;
- assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
- }
+/// Common code shared between diamond conversions.
+/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
+/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
+/// and FalseBBI
+/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
+/// and \p FalseBBI
+/// \p RemoveBranch - Remove the common branch of the two blocks before
+/// predicating. Only false for unanalyzable fallthrough
+/// cases. The caller will replace the branch if necessary.
+/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
+/// unanalyzable fallthrough
+bool IfConverter::IfConvertDiamondCommon(
+ BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveBranch, bool MergeAddEdges) {
if (TrueBBI.IsDone || FalseBBI.IsDone ||
- TrueBBI.BB->pred_size() > 1 ||
- FalseBBI.BB->pred_size() > 1) {
+ TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
// Something has changed. It's no longer safe to predicate these blocks.
BBI.IsAnalyzed = false;
TrueBBI.IsAnalyzed = false;
@@ -1373,36 +1738,47 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
BBInfo *BBI1 = &TrueBBI;
BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
- if (TII->ReverseBranchCondition(RevCond))
+ if (TII->reverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");
SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
// Figure out the more profitable ordering.
bool DoSwap = false;
- if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ if (TClobbersPred && !FClobbersPred)
DoSwap = true;
- else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ else if (!TClobbersPred && !FClobbersPred) {
if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
DoSwap = true;
- }
+ } else if (TClobbersPred && FClobbersPred)
+ llvm_unreachable("Predicate info cannot be clobbered by both sides.");
if (DoSwap) {
std::swap(BBI1, BBI2);
std::swap(Cond1, Cond2);
}
// Remove the conditional branch from entry to the blocks.
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
-
- // Initialize liveins to the first BB. These are potentially redefined by
- // predicated instructions.
- Redefs.init(TRI);
- Redefs.addLiveIns(*BBI1->BB);
+ BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+
+ MachineBasicBlock &MBB1 = *BBI1->BB;
+ MachineBasicBlock &MBB2 = *BBI2->BB;
+
+ // Initialize the Redefs:
+ // - BB2 live-in regs need implicit uses before being redefined by BB1
+ // instructions.
+ // - BB1 live-out regs need implicit uses before being redefined by BB2
+ // instructions. We start with BB1 live-ins so we have the live-out regs
+ // after tracking the BB1 instructions.
+ Redefs.init(*TRI);
+ if (MRI->tracksLiveness()) {
+ Redefs.addLiveIns(MBB1);
+ Redefs.addLiveIns(MBB2);
+ }
// Remove the duplicated instructions at the beginnings of both paths.
// Skip dbg_value instructions
- MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
- MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
+ MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
+ MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
BBI1->NonPredSize -= NumDups1;
BBI2->NonPredSize -= NumDups1;
@@ -1421,52 +1797,60 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Compute a set of registers which must not be killed by instructions in BB1:
// This is everything used+live in BB2 after the duplicated instructions. We
// can compute this set by simulating liveness backwards from the end of BB2.
- DontKill.init(TRI);
- for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
- E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
- DontKill.stepBackward(*I);
+ DontKill.init(*TRI);
+ if (MRI->tracksLiveness()) {
+ for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+ DontKill.stepBackward(MI);
+
+ for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
+ SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Dummy;
+ Redefs.stepForward(MI, Dummy);
+ }
}
+ BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
+ MBB2.erase(MBB2.begin(), DI2);
- for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
- ++I) {
- SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
- Redefs.stepForward(*I, IgnoredClobbers);
- }
- BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
- BBI2->BB->erase(BBI2->BB->begin(), DI2);
-
- // Remove branch from the 'true' block, unless it was not analyzable.
- // Non-analyzable branches need to be preserved, since in such cases,
- // the CFG structure is not an actual diamond (the join block may not
- // be present).
- if (BBI1->IsBrAnalyzable)
- BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ // The branches have been checked to match, so it is safe to remove the branch
+ // in BB1 and rely on the copy in BB2
+#ifndef NDEBUG
+ // Unanalyzable branches must match exactly. Check that now.
+ if (!BBI1->IsBrAnalyzable)
+ verifySameBranchInstructions(&MBB1, &MBB2);
+#endif
+ BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB);
// Remove duplicated instructions.
- DI1 = BBI1->BB->end();
+ DI1 = MBB1.end();
for (unsigned i = 0; i != NumDups2; ) {
// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.
- assert (DI1 != BBI1->BB->begin());
+ assert(DI1 != MBB1.begin());
--DI1;
// skip dbg_value instructions
if (!DI1->isDebugValue())
++i;
}
- BBI1->BB->erase(DI1, BBI1->BB->end());
+ MBB1.erase(DI1, MBB1.end());
// Kill flags in the true block for registers living into the false block
// must be removed.
- RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
+ RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
- // Remove 'false' block branch (unless it was not analyzable), and find
- // the last instruction to predicate.
- if (BBI2->IsBrAnalyzable)
- BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
+ // The branches have been checked to match. Skip over the branch in the false
+ // block so that we don't try to predicate it.
+ if (RemoveBranch)
+ BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
+ else {
+ do {
+ assert(DI2 != MBB2.begin());
+ DI2--;
+ } while (DI2->isBranch() || DI2->isDebugValue());
+ DI2++;
+ }
while (NumDups2 != 0) {
// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.
- assert (DI2 != BBI2->BB->begin());
+ assert(DI2 != MBB2.begin());
--DI2;
// skip dbg_value instructions
if (!DI2->isDebugValue())
@@ -1483,13 +1867,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// addne r0, r1, #1
SmallSet<unsigned, 4> RedefsByFalse;
SmallSet<unsigned, 4> ExtUses;
- if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
- for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
- if (FI->isDebugValue())
+ if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
+ for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
+ if (FI.isDebugValue())
continue;
SmallVector<unsigned, 4> Defs;
- for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = FI->getOperand(i);
+ for (const MachineOperand &MO : FI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -1506,8 +1889,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
}
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- unsigned Reg = Defs[i];
+ for (unsigned Reg : Defs) {
if (!ExtUses.count(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -1518,17 +1900,17 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
}
// Predicate the 'true' block.
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
+ PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse);
// After predicating BBI1, if there is a predicated terminator in BBI1 and
// a non-predicated in BBI2, then we don't want to predicate the one from
// BBI2. The reason is that if we merged these blocks, we would end up with
// two predicated terminators in the same block.
- if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
- MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
- MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
- if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
- BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
+ if (!MBB2.empty() && (DI2 == MBB2.end())) {
+ MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
+ MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
+ if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) &&
+ BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T))
--DI2;
}
@@ -1536,8 +1918,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
PredicateBlock(*BBI2, DI2, *Cond2);
// Merge the true block into the entry of the diamond.
- MergeBlocks(BBI, *BBI1, TailBB == nullptr);
- MergeBlocks(BBI, *BBI2, TailBB == nullptr);
+ MergeBlocks(BBI, *BBI1, MergeAddEdges);
+ MergeBlocks(BBI, *BBI2, MergeAddEdges);
+ return true;
+}
+
+/// If convert an almost-diamond sub-CFG where the true
+/// and false blocks share a common tail.
+bool IfConverter::IfConvertForkedDiamond(
+ BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+ // Save the debug location for later.
+ DebugLoc dl;
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
+ if (TIE != TrueBBI.BB->end())
+ dl = TIE->getDebugLoc();
+ // Removing branches from both blocks is safe, because we have already
+ // determined that both blocks have the same branch instructions. The branch
+ // will be added back at the end, unpredicated.
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TClobbersPred, FClobbersPred,
+ /* RemoveBranch */ true, /* MergeAddEdges */ true))
+ return false;
+
+ // Add back the branch.
+ // Debug location saved above when removing the branch from BBI2
+ TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
+ TrueBBI.BrCond, dl);
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(*BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// If convert a diamond sub-CFG.
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TClobbersPred, FClobbersPred,
+ /* RemoveBranch */ TrueBBI.IsBrAnalyzable,
+ /* MergeAddEdges */ TailBB == nullptr))
+ return false;
// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then
@@ -1560,7 +2006,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
CanMergeTail = false;
else if (NumPreds == 1 && CanMergeTail) {
MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
- if (*PI != BBI1->BB && *PI != BBI2->BB)
+ if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
CanMergeTail = false;
}
if (CanMergeTail) {
@@ -1568,7 +2014,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
TailBBI.IsDone = true;
} else {
BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
- InsertUncondBranch(BBI.BB, TailBB, TII);
+ InsertUncondBranch(*BBI.BB, *TailBB, TII);
BBI.HasFallThrough = false;
}
}
@@ -1576,13 +2022,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// RemoveExtraEdges won't work if the block has an unanalyzable branch,
// which can happen here if TailBB is unanalyzable and is merged, so
// explicitly remove BBI1 and BBI2 as successors.
- BBI.BB->removeSuccessor(BBI1->BB);
- BBI.BB->removeSuccessor(BBI2->BB, true);
+ BBI.BB->removeSuccessor(TrueBBI.BB);
+ BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
RemoveExtraEdges(BBI);
// Update block info.
BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
- InvalidatePreds(BBI.BB);
+ InvalidatePreds(*BBI.BB);
// FIXME: Must maintain LiveIns.
return true;
@@ -1594,8 +2040,7 @@ static bool MaySpeculate(const MachineInstr &MI,
if (!MI.isSafeToMove(nullptr, SawStore))
return false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -1608,15 +2053,15 @@ static bool MaySpeculate(const MachineInstr &MI,
return true;
}
-/// PredicateBlock - Predicate instructions from the start of the block to the
-/// specified end with the specified condition.
+/// Predicate instructions from the start of the block to the specified end with
+/// the specified condition.
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
SmallSet<unsigned, 4> *LaterRedefs) {
bool AnyUnpred = false;
bool MaySpec = LaterRedefs != nullptr;
- for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
+ for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
if (I.isDebugValue() || TII->isPredicated(I))
continue;
// It may be possible not to predicate an instruction if it's the 'true'
@@ -1651,14 +2096,15 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
++NumUnpred;
}
-/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
-/// the destination block. Skip end of block branches if IgnoreBr is true.
+/// Copy and predicate instructions from source BB to the destination block.
+/// Skip end of block branches if IgnoreBr is true.
void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
bool IgnoreBr) {
MachineFunction &MF = *ToBBI.BB->getParent();
- for (auto &I : *FromBBI.BB) {
+ MachineBasicBlock &FromMBB = *FromBBI.BB;
+ for (MachineInstr &I : FromMBB) {
// Do not copy the end of the block branches.
if (IgnoreBr && I.isBranch())
break;
@@ -1691,13 +2137,12 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
}
if (!IgnoreBr) {
- std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
- MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(),
+ FromMBB.succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromMBB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = Succs[i];
+ for (MachineBasicBlock *Succ : Succs) {
// Fallthrough edge can't be transferred.
if (Succ == FallThrough)
continue;
@@ -1714,25 +2159,25 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
++NumDupBBs;
}
-/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
-/// This will leave FromBB as an empty block, so remove all of its
-/// successor edges except for the fall-through edge. If AddEdges is true,
-/// i.e., when FromBBI's branch is being moved, add those successor edges to
-/// ToBBI.
+/// Move all instructions from FromBB to the end of ToBB. This will leave
+/// FromBB as an empty block, so remove all of its successor edges except for
+/// the fall-through edge. If AddEdges is true, i.e., when FromBBI's branch is
+/// being moved, add those successor edges to ToBBI.
void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
- assert(!FromBBI.BB->hasAddressTaken() &&
+ MachineBasicBlock &FromMBB = *FromBBI.BB;
+ assert(!FromMBB.hasAddressTaken() &&
"Removing a BB whose address is taken!");
- // In case FromBBI.BB contains terminators (e.g. return instruction),
+ // In case FromMBB contains terminators (e.g. return instruction),
// first move the non-terminator instructions, then the terminators.
- MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
+ MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
- ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
+ ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI);
// If FromBB has non-predicated terminator we should copy it at the end.
- if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
+ if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI))
ToTI = ToBBI.BB->end();
- ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
+ ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end());
// Force normalizing the successors' probabilities of ToBBI.BB to convert all
// unknown probabilities into known ones.
@@ -1740,25 +2185,23 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// eliminate all unknown probabilities in MBB.
ToBBI.BB->normalizeSuccProbs();
- SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
- MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
+ FromMBB.succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromMBB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
- // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
- // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+ // The edge probability from ToBBI.BB to FromMBB, which is only needed when
+ // AddEdges is true and FromMBB is a successor of ToBBI.BB.
auto To2FromProb = BranchProbability::getZero();
- if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
- To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
- // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+ if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+ To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
+ // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the
// edge probability being merged to other edges when this edge is removed
// later.
- ToBBI.BB->setSuccProbability(
- std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
- BranchProbability::getZero());
+ ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB),
+ BranchProbability::getZero());
}
- for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = FromSuccs[i];
+ for (MachineBasicBlock *Succ : FromSuccs) {
// Fallthrough edge can't be transferred.
if (Succ == FallThrough)
continue;
@@ -1766,26 +2209,26 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
auto NewProb = BranchProbability::getZero();
if (AddEdges) {
// Calculate the edge probability for the edge from ToBBI.BB to Succ,
- // which is a portion of the edge probability from FromBBI.BB to Succ. The
- // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+ // which is a portion of the edge probability from FromMBB to Succ. The
+ // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
// FromBBI is a successor of ToBBI.BB. See comment below for excepion).
- NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+ NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
- // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
- // only happens when if-converting a diamond CFG and FromBBI.BB is the
- // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we
- // could just use the probabilities on FromBBI.BB's out-edges when adding
+ // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
+ // only happens when if-converting a diamond CFG and FromMBB is the
+ // tail BB. In this case FromMBB post-dominates ToBBI.BB and hence we
+ // could just use the probabilities on FromMBB's out-edges when adding
// new successors.
if (!To2FromProb.isZero())
NewProb *= To2FromProb;
}
- FromBBI.BB->removeSuccessor(Succ);
+ FromMBB.removeSuccessor(Succ);
if (AddEdges) {
// If the edge from ToBBI.BB to Succ already exists, update the
// probability of this edge by adding NewProb to it. An example is shown
- // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+ // below, in which A is ToBBI.BB and B is FromMBB. In this case we
// don't have to set C as A's successor as it already is. We only need to
// update the edge probability on A->C. Note that B will not be
// immediately removed from A's successors. It is possible that B->D is
@@ -1807,7 +2250,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
//
if (ToBBI.BB->isSuccessor(Succ))
ToBBI.BB->setSuccProbability(
- std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+ find(ToBBI.BB->successors(), Succ),
MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
else
ToBBI.BB->addSuccessor(Succ, NewProb);
@@ -1815,8 +2258,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
}
// Now FromBBI always falls through to the next block!
- if (NBB && !FromBBI.BB->isSuccessor(NBB))
- FromBBI.BB->addSuccessor(NBB);
+ if (NBB && !FromMBB.isSuccessor(NBB))
+ FromMBB.addSuccessor(NBB);
// Normalize the probabilities of ToBBI.BB's successors with all adjustment
// we've done above.
@@ -1839,6 +2282,6 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
}
FunctionPass *
-llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
+llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) {
return new IfConverter(std::move(Ftor));
}
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 31d6bd0..9588dfb 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -51,6 +51,12 @@ static cl::opt<int> PageSize("imp-null-check-page-size",
cl::desc("The page size of the target in bytes"),
cl::init(4096));
+static cl::opt<unsigned> MaxInstsToConsider(
+ "imp-null-max-insts-to-consider",
+ cl::desc("The max number of instructions to consider hoisting loads over "
+ "(the algorithm is quadratic over this number)"),
+ cl::init(8));
+
#define DEBUG_TYPE "implicit-null-checks"
STATISTIC(NumImplicitNullChecks,
@@ -59,6 +65,44 @@ STATISTIC(NumImplicitNullChecks,
namespace {
class ImplicitNullChecks : public MachineFunctionPass {
+ /// Return true if \c computeDependence can process \p MI.
+ static bool canHandle(const MachineInstr *MI);
+
+ /// Helper function for \c computeDependence. Return true if \p A
+ /// and \p B do not have any dependences between them, and can be
+ /// re-ordered without changing program semantics.
+ bool canReorder(const MachineInstr *A, const MachineInstr *B);
+
+ /// A data type for representing the result computed by \c
+ /// computeDependence. States whether it is okay to reorder the
+ /// instruction passed to \c computeDependence with at most one
+ /// depednency.
+ struct DependenceResult {
+ /// Can we actually re-order \p MI with \p Insts (see \c
+ /// computeDependence).
+ bool CanReorder;
+
+ /// If non-None, then an instruction in \p Insts that also must be
+ /// hoisted.
+ Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
+
+ /*implicit*/ DependenceResult(
+ bool CanReorder,
+ Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
+ : CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
+ assert((!PotentialDependence || CanReorder) &&
+ "!CanReorder && PotentialDependence.hasValue() not allowed!");
+ }
+ };
+
+ /// Compute a result for the following question: can \p MI be
+ /// re-ordered from after \p Insts to before it.
+ ///
+ /// \c canHandle should return true for all instructions in \p
+ /// Insts.
+ DependenceResult computeDependence(const MachineInstr *MI,
+ ArrayRef<MachineInstr *> Insts);
+
/// Represents one null check that can be made implicit.
class NullCheck {
// The memory operation the null check can be folded into.
@@ -114,6 +158,19 @@ class ImplicitNullChecks : public MachineFunctionPass {
MachineBasicBlock *HandlerMBB);
void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
+ /// Is \p MI a memory operation that can be used to implicitly null check the
+ /// value in \p PointerReg? \p PrevInsts is the set of instruction seen since
+ /// the explicit null check on \p PointerReg.
+ bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts);
+
+ /// Return true if \p FaultingMI can be hoisted from after the the
+ /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
+ /// non-null value if we also need to (and legally can) hoist a depedency.
+ bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+
public:
static char ID;
@@ -129,160 +186,70 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
};
-/// \brief Detect re-ordering hazards and dependencies.
-///
-/// This class keeps track of defs and uses, and can be queried if a given
-/// machine instruction can be re-ordered from after the machine instructions
-/// seen so far to before them.
-class HazardDetector {
- static MachineInstr *getUnknownMI() {
- return DenseMapInfo<MachineInstr *>::getTombstoneKey();
- }
-
- // Maps physical registers to the instruction defining them. If there has
- // been more than one def of an specific register, that register is mapped to
- // getUnknownMI().
- DenseMap<unsigned, MachineInstr *> RegDefs;
- DenseSet<unsigned> RegUses;
- const TargetRegisterInfo &TRI;
- bool hasSeenClobber;
- AliasAnalysis &AA;
-
-public:
- explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA)
- : TRI(TRI), hasSeenClobber(false), AA(AA) {}
+}
- /// \brief Make a note of \p MI for later queries to isSafeToHoist.
- ///
- /// May clobber this HazardDetector instance. \see isClobbered.
- void rememberInstruction(MachineInstr *MI);
+bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
+ if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+ return false;
+ auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
+ (void)IsRegMask;
- /// \brief Return true if it is safe to hoist \p MI from after all the
- /// instructions seen so far (via rememberInstruction) to before it. If \p MI
- /// has one and only one transitive dependency, set \p Dependency to that
- /// instruction. If there are more dependencies, return false.
- bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency);
+ assert(!llvm::any_of(MI->operands(), IsRegMask) &&
+ "Calls were filtered out above!");
- /// \brief Return true if this instance of HazardDetector has been clobbered
- /// (i.e. has no more useful information).
- ///
- /// A HazardDetecter is clobbered when it sees a construct it cannot
- /// understand, and it would have to return a conservative answer for all
- /// future queries. Having a separate clobbered state lets the client code
- /// bail early, without making queries about all of the future instructions
- /// (which would have returned the most conservative answer anyway).
- ///
- /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
- /// is an error.
- bool isClobbered() { return hasSeenClobber; }
-};
+ auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
+ return llvm::all_of(MI->memoperands(), IsUnordered);
}
+ImplicitNullChecks::DependenceResult
+ImplicitNullChecks::computeDependence(const MachineInstr *MI,
+ ArrayRef<MachineInstr *> Block) {
+ assert(llvm::all_of(Block, canHandle) && "Check this first!");
+ assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!");
-void HazardDetector::rememberInstruction(MachineInstr *MI) {
- assert(!isClobbered() &&
- "Don't add instructions to a clobbered hazard detector");
+ Optional<ArrayRef<MachineInstr *>::iterator> Dep;
- if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
- hasSeenClobber = true;
- return;
- }
+ for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
+ if (canReorder(*I, MI))
+ continue;
- for (auto *MMO : MI->memoperands()) {
- // Right now we don't want to worry about LLVM's memory model.
- if (!MMO->isUnordered()) {
- hasSeenClobber = true;
- return;
+ if (Dep == None) {
+ // Found one possible dependency, keep track of it.
+ Dep = I;
+ } else {
+ // We found two dependencies, so bail out.
+ return {false, None};
}
}
- for (auto &MO : MI->operands()) {
- if (!MO.isReg() || !MO.getReg())
- continue;
-
- if (MO.isDef()) {
- auto It = RegDefs.find(MO.getReg());
- if (It == RegDefs.end())
- RegDefs.insert({MO.getReg(), MI});
- else {
- assert(It->second && "Found null MI?");
- It->second = getUnknownMI();
- }
- } else
- RegUses.insert(MO.getReg());
- }
+ return {true, Dep};
}
-bool HazardDetector::isSafeToHoist(MachineInstr *MI,
- MachineInstr *&Dependency) {
- assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
- Dependency = nullptr;
+bool ImplicitNullChecks::canReorder(const MachineInstr *A,
+ const MachineInstr *B) {
+ assert(canHandle(A) && canHandle(B) && "Precondition!");
- // Right now we don't want to worry about LLVM's memory model. This can be
- // made more precise later.
- for (auto *MMO : MI->memoperands())
- if (!MMO->isUnordered())
- return false;
+ // canHandle makes sure that we _can_ correctly analyze the dependencies
+ // between A and B here -- for instance, we should not be dealing with heap
+ // load-store dependencies here.
- for (auto &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg()) {
- for (auto &RegDef : RegDefs) {
- unsigned Reg = RegDef.first;
- MachineInstr *MI = RegDef.second;
- if (!TRI.regsOverlap(Reg, MO.getReg()))
- continue;
+ for (auto MOA : A->operands()) {
+ if (!(MOA.isReg() && MOA.getReg()))
+ continue;
- // We found a write-after-write or read-after-write, see if the
- // instruction causing this dependency can be hoisted too.
-
- if (MI == getUnknownMI())
- // We don't have precise dependency information.
- return false;
-
- if (Dependency) {
- if (Dependency == MI)
- continue;
- // We already have one dependency, and we can track only one.
- return false;
- }
-
- // Now check if MI is actually a dependency that can be hoisted.
-
- // We don't want to track transitive dependencies. We already know that
- // MI is the only instruction that defines Reg, but we need to be sure
- // that it does not use any registers that have been defined (trivially
- // checked below by ensuring that there are no register uses), and that
- // it is the only def for every register it defines (otherwise we could
- // violate a write after write hazard).
- auto IsMIOperandSafe = [&](MachineOperand &MO) {
- if (!MO.isReg() || !MO.getReg())
- return true;
- if (MO.isUse())
- return false;
- assert((!MO.isDef() || RegDefs.count(MO.getReg())) &&
- "All defs must be tracked in RegDefs by now!");
- return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI;
- };
-
- if (!all_of(MI->operands(), IsMIOperandSafe))
- return false;
-
- // Now check for speculation safety:
- bool SawStore = true;
- if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad())
- return false;
-
- Dependency = MI;
- }
+ unsigned RegA = MOA.getReg();
+ for (auto MOB : B->operands()) {
+ if (!(MOB.isReg() && MOB.getReg()))
+ continue;
- if (MO.isDef())
- for (unsigned Reg : RegUses)
- if (TRI.regsOverlap(Reg, MO.getReg()))
- return false; // We found a write-after-read
+ unsigned RegB = MOB.getReg();
+
+ if (TRI->regsOverlap(RegA, RegB))
+ return false;
}
}
@@ -316,6 +283,96 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
return false;
}
+bool ImplicitNullChecks::isSuitableMemoryOp(
+ MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+ int64_t Offset;
+ unsigned BaseReg;
+
+ if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
+ BaseReg != PointerReg)
+ return false;
+
+ // We want the load to be issued at a sane offset from PointerReg, so that
+ // if PointerReg is null then the load reliably page faults.
+ if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
+ return false;
+
+ // Finally, we need to make sure that the load instruction actually is
+ // loading from PointerReg, and there isn't some re-definition of PointerReg
+ // between the compare and the load.
+ for (auto *PrevMI : PrevInsts)
+ for (auto &PrevMO : PrevMI->operands())
+ if (PrevMO.isReg() && PrevMO.getReg() &&
+ TRI->regsOverlap(PrevMO.getReg(), PointerReg))
+ return false;
+
+ return true;
+}
+
+bool ImplicitNullChecks::canHoistLoadInst(
+ MachineInstr *FaultingMI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
+ MachineInstr *&Dependence) {
+ auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
+ if (!DepResult.CanReorder)
+ return false;
+
+ if (!DepResult.PotentialDependence) {
+ Dependence = nullptr;
+ return true;
+ }
+
+ auto DependenceItr = *DepResult.PotentialDependence;
+ auto *DependenceMI = *DependenceItr;
+
+ // We don't want to reason about speculating loads. Note -- at this point
+ // we should have already filtered out all of the other non-speculatable
+ // things, like calls and stores.
+ assert(canHandle(DependenceMI) && "Should never have reached here!");
+ if (DependenceMI->mayLoad())
+ return false;
+
+ for (auto &DependenceMO : DependenceMI->operands()) {
+ if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+ continue;
+
+ // Make sure that we won't clobber any live ins to the sibling block by
+ // hoisting Dependency. For instance, we can't hoist INST to before the
+ // null check (even if it safe, and does not violate any dependencies in
+ // the non_null_block) if %rdx is live in to _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx<def> = INST
+ // ...
+ //
+ // This restriction does not apply to the faulting load inst because in
+ // case the pointer loaded from is in the null page, the load will not
+ // semantically execute, and affect machine state. That is, if the load
+ // was loading into %rax and it faults, the value of %rax should stay the
+ // same as it would have been had the load not have executed and we'd have
+ // branched to NullSucc directly.
+ if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+ return false;
+
+ // The Dependency can't be re-defining the base register -- then we won't
+ // get the memory operation on the address we want. This is already
+ // checked in \c IsSuitableMemoryOp.
+ assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+ "Should have been checked before!");
+ }
+
+ auto DepDepResult =
+ computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
+
+ if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence)
+ return false;
+
+ Dependence = DependenceMI;
+ return true;
+}
+
/// Analyze MBB to check if its terminating branch can be turned into an
/// implicit null check. If yes, append a description of the said null check to
/// NullCheckList and return true, else return false.
@@ -415,63 +472,24 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
// ptr could be some non-null invalid reference that never gets loaded from
// because some_cond is always true.
- unsigned PointerReg = MBP.LHS.getReg();
-
- HazardDetector HD(*TRI, *AA);
-
- for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
- ++MII) {
- MachineInstr &MI = *MII;
- unsigned BaseReg;
- int64_t Offset;
- MachineInstr *Dependency = nullptr;
- if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
- if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg &&
- Offset < PageSize && MI.getDesc().getNumDefs() <= 1 &&
- HD.isSafeToHoist(&MI, Dependency)) {
-
- auto DependencyOperandIsOk = [&](MachineOperand &MO) {
- assert(!(MO.isReg() && MO.isUse()) &&
- "No transitive dependendencies please!");
- if (!MO.isReg() || !MO.getReg() || !MO.isDef())
- return true;
-
- // Make sure that we won't clobber any live ins to the sibling block
- // by hoisting Dependency. For instance, we can't hoist INST to
- // before the null check (even if it safe, and does not violate any
- // dependencies in the non_null_block) if %rdx is live in to
- // _null_block.
- //
- // test %rcx, %rcx
- // je _null_block
- // _non_null_block:
- // %rdx<def> = INST
- // ...
- if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg()))
- return false;
-
- // Make sure Dependency isn't re-defining the base register. Then we
- // won't get the memory operation on the address we want.
- if (TRI->regsOverlap(MO.getReg(), BaseReg))
- return false;
-
- return true;
- };
-
- bool DependencyOperandsAreOk =
- !Dependency ||
- all_of(Dependency->operands(), DependencyOperandIsOk);
-
- if (DependencyOperandsAreOk) {
- NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
- NullSucc, Dependency);
- return true;
- }
- }
+ const unsigned PointerReg = MBP.LHS.getReg();
- HD.rememberInstruction(&MI);
- if (HD.isClobbered())
+ SmallVector<MachineInstr *, 8> InstsSeenSoFar;
+
+ for (auto &MI : *NotNullSucc) {
+ if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider)
return false;
+
+ MachineInstr *Dependence;
+ if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
+ canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
+ Dependence)) {
+ NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+ NullSucc, Dependence);
+ return true;
+ }
+
+ InstsSeenSoFar.push_back(&MI);
}
return false;
@@ -518,7 +536,7 @@ void ImplicitNullChecks::rewriteNullChecks(
for (auto &NC : NullCheckList) {
// Remove the conditional branch dependent on the null check.
- unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock());
+ unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
(void)BranchesRemoved;
assert(BranchesRemoved > 0 && "expected at least one branch!");
@@ -560,13 +578,14 @@ void ImplicitNullChecks::rewriteNullChecks(
NC.getCheckOperation()->eraseFromParent();
// Insert an *unconditional* branch to not-null successor.
- TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+ TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
/*Cond=*/None, DL);
NumImplicitNullChecks++;
}
}
+
char ImplicitNullChecks::ID = 0;
char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 197db77..3d81184 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -114,7 +114,7 @@ public:
AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -172,7 +172,7 @@ public:
AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
MDT(pass.getAnalysis<MachineDominatorTree>()),
Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
- MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
TII(*mf.getSubtarget().getInstrInfo()),
TRI(*mf.getSubtarget().getRegisterInfo()),
MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -185,10 +185,7 @@ private:
bool isSnippet(const LiveInterval &SnipLI);
void collectRegsToSpill();
- bool isRegToSpill(unsigned Reg) {
- return std::find(RegsToSpill.begin(),
- RegsToSpill.end(), Reg) != RegsToSpill.end();
- }
+ bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); }
bool isSibling(unsigned Reg);
bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
@@ -380,7 +377,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
MachineBasicBlock::iterator MII;
if (SrcVNI->isPHIDef())
- MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
else {
MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
assert(DefMI && "Defining instruction disappeared");
@@ -553,12 +550,18 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
return true;
}
- // Alocate a new register for the remat.
+ // Allocate a new register for the remat.
unsigned NewVReg = Edit->createFrom(Original);
// Finally we can rematerialize OrigMI before MI.
SlotIndex DefIdx =
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+
+ // We take the DebugLoc from MI, since OrigMI may be attributed to a
+ // different source location.
+ auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
+ NewMI->setDebugLoc(MI.getDebugLoc());
+
(void)DefIdx;
DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
<< *LIS.getInstructionFromIndex(DefIdx));
@@ -736,9 +739,12 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
bool WasCopy = MI->isCopy();
unsigned ImpReg = 0;
- bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT ||
- MI->getOpcode() == TargetOpcode::STACKMAP);
+ // Spill subregs if the target allows it.
+ // We always want to spill subregs for stackmap/patchpoint pseudos.
+ bool SpillSubRegs = TII.isSubregFoldable() ||
+ MI->getOpcode() == TargetOpcode::STATEPOINT ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI->getOpcode() == TargetOpcode::STACKMAP;
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
@@ -751,7 +757,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
ImpReg = MO.getReg();
continue;
}
- // FIXME: Teach targets to deal with subregs.
+
if (!SpillSubRegs && MO.getSubReg())
return false;
// We cannot fold a load instruction into a def.
@@ -762,6 +768,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
FoldOps.push_back(Idx);
}
+ // If we only have implicit uses, we won't be able to fold that.
+ // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try!
+ if (FoldOps.empty())
+ return false;
+
MachineInstrSpan MIS(MI);
MachineInstr *FoldMI =
@@ -1113,7 +1124,7 @@ void HoistSpillHelper::rmRedundantSpills(
// earlier spill with smaller SlotIndex.
for (const auto CurrentSpill : Spills) {
MachineBasicBlock *Block = CurrentSpill->getParent();
- MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+ MachineDomTreeNode *Node = MDT.getBase().getNode(Block);
MachineInstr *PrevSpill = SpillBBToSpill[Node];
if (PrevSpill) {
SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
@@ -1121,9 +1132,9 @@ void HoistSpillHelper::rmRedundantSpills(
MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
SpillsToRm.push_back(SpillToRm);
- SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+ SpillBBToSpill[MDT.getBase().getNode(Block)] = SpillToKeep;
} else {
- SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+ SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill;
}
}
for (const auto SpillToRm : SpillsToRm)
@@ -1198,7 +1209,7 @@ void HoistSpillHelper::getVisitOrders(
// Sort the nodes in WorkSet in top-down order and save the nodes
// in Orders. Orders will be used for hoisting in runHoistSpills.
unsigned idx = 0;
- Orders.push_back(MDT.DT->getNode(Root));
+ Orders.push_back(MDT.getBase().getNode(Root));
do {
MachineDomTreeNode *Node = Orders[idx++];
const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 3f11119..ec35b3f 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -29,6 +29,9 @@
// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
// intrinsic in ARM backend.
//
+// In X86, this can be further optimized into a set of target
+// specific loads followed by an optimized sequence of shuffles.
+//
// E.g. An interleaved store (Factor = 3):
// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
@@ -37,6 +40,8 @@
// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
// intrinsic in ARM backend.
//
+// Similarly, a set of interleaved stores can be transformed into an optimized
+// sequence of shuffles followed by a set of target specific stores for X86.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
@@ -57,8 +62,6 @@ static cl::opt<bool> LowerInterleavedAccesses(
cl::desc("Enable lowering interleaved accesses to intrinsics"),
cl::init(true), cl::Hidden);
-static unsigned MaxFactor; // The maximum supported interleave factor.
-
namespace {
class InterleavedAccess : public FunctionPass {
@@ -70,7 +73,7 @@ public:
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const override { return "Interleaved Access Pass"; }
+ StringRef getPassName() const override { return "Interleaved Access Pass"; }
bool runOnFunction(Function &F) override;
@@ -84,6 +87,9 @@ private:
const TargetMachine *TM;
const TargetLowering *TLI;
+ /// The maximum supported interleave factor.
+ unsigned MaxFactor;
+
/// \brief Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,
SmallVector<Instruction *, 32> &DeadInsts);
@@ -144,7 +150,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned &Index) {
+ unsigned &Index, unsigned MaxFactor) {
if (Mask.size() < 2)
return false;
@@ -156,13 +162,19 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
return false;
}
-/// \brief Check if the mask is RE-interleave mask for an interleaved store.
-///
-/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+/// \brief Check if the mask can be used in an interleaved store.
+//
+/// It checks for a more general pattern than the RE-interleave mask.
+/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
+/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
///
-/// E.g. The RE-interleave mask (Factor = 2) could be:
-/// <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
+/// The particular case of an RE-interleave mask is:
+/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+ unsigned MaxFactor, unsigned OpNumElts) {
unsigned NumElts = Mask.size();
if (NumElts < 4)
return false;
@@ -172,21 +184,75 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
if (NumElts % Factor)
continue;
- unsigned NumSubElts = NumElts / Factor;
- if (!isPowerOf2_32(NumSubElts))
+ unsigned LaneLen = NumElts / Factor;
+ if (!isPowerOf2_32(LaneLen))
continue;
- // Check whether each element matchs the RE-interleaved rule. Ignore undef
- // elements.
- unsigned i = 0;
- for (; i < NumElts; i++)
- if (Mask[i] >= 0 &&
- static_cast<unsigned>(Mask[i]) !=
- (i % Factor) * NumSubElts + i / Factor)
+ // Check whether each element matches the general interleaved rule.
+ // Ignore undef elements, as long as the defined elements match the rule.
+ // Outer loop processes all factors (x, y, z in the above example)
+ unsigned I = 0, J;
+ for (; I < Factor; I++) {
+ unsigned SavedLaneValue;
+ unsigned SavedNoUndefs = 0;
+
+ // Inner loop processes consecutive accesses (x, x+1... in the example)
+ for (J = 0; J < LaneLen - 1; J++) {
+ // Lane computes x's position in the Mask
+ unsigned Lane = J * Factor + I;
+ unsigned NextLane = Lane + Factor;
+ int LaneValue = Mask[Lane];
+ int NextLaneValue = Mask[NextLane];
+
+ // If both are defined, values must be sequential
+ if (LaneValue >= 0 && NextLaneValue >= 0 &&
+ LaneValue + 1 != NextLaneValue)
+ break;
+
+ // If the next value is undef, save the current one as reference
+ if (LaneValue >= 0 && NextLaneValue < 0) {
+ SavedLaneValue = LaneValue;
+ SavedNoUndefs = 1;
+ }
+
+ // Undefs are allowed, but defined elements must still be consecutive:
+ // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+ // Verify this by storing the last non-undef followed by an undef
+ // Check that following non-undef masks are incremented with the
+ // corresponding distance.
+ if (SavedNoUndefs > 0 && LaneValue < 0) {
+ SavedNoUndefs++;
+ if (NextLaneValue >= 0 &&
+ SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+ break;
+ }
+ }
+
+ if (J < LaneLen - 1)
break;
- // Find a RE-interleaved mask of current factor.
- if (i == NumElts)
+ int StartMask = 0;
+ if (Mask[I] >= 0) {
+ // Check that the start of the I range (J=0) is greater than 0
+ StartMask = Mask[I];
+ } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+ // StartMask defined by the last value in lane
+ StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+ } else if (SavedNoUndefs > 0) {
+ // StartMask defined by some non-zero value in the j loop
+ StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+ }
+ // else StartMask remains set to 0, i.e. all elements are undefs
+
+ if (StartMask < 0)
+ break;
+ // We must stay within the vectors; This case can happen with undefs.
+ if (StartMask + LaneLen > OpNumElts*2)
+ break;
+ }
+
+ // Found an interleaved mask of current factor.
+ if (I == Factor)
return true;
}
@@ -224,7 +290,8 @@ bool InterleavedAccess::lowerInterleavedLoad(
unsigned Factor, Index;
// Check if the first shufflevector is DE-interleave shuffle.
- if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
+ if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
+ MaxFactor))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
@@ -342,7 +409,8 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
+ unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+ if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
return false;
DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 2962f87..afd2406 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -436,8 +436,14 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
errs() << "WARNING: this target does not support the llvm."
<< (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
"return" : "frame") << "address intrinsic.\n";
- CI->replaceAllUsesWith(ConstantPointerNull::get(
- cast<PointerType>(CI->getType())));
+ CI->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(CI->getType())));
+ break;
+ case Intrinsic::addressofreturnaddress:
+ errs() << "WARNING: this target does not support the "
+ "llvm.addressofreturnaddress intrinsic.\n";
+ CI->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(CI->getType())));
break;
case Intrinsic::prefetch:
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 9eb43d2..26794e2 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -15,7 +15,6 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -102,25 +101,12 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
});
}
-MachineModuleInfo &
-LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const {
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
- *getMCRegisterInfo(),
- getObjFileLowering());
- PM.add(MMI);
- return *MMI;
-}
-
-void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM,
- MachineFunctionInitializer *MFInitializer) const {
- PM.add(new MachineFunctionAnalysis(*this, MFInitializer));
-}
-
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static MCContext *
addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
bool DisableVerify, AnalysisID StartBefore,
- AnalysisID StartAfter, AnalysisID StopAfter,
+ AnalysisID StartAfter, AnalysisID StopBefore,
+ AnalysisID StopAfter,
MachineFunctionInitializer *MFInitializer = nullptr) {
// When in emulated TLS mode, add the LowerEmuTLS pass.
@@ -135,7 +121,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
- PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
+ PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
+ StopAfter);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
@@ -150,8 +137,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
PassConfig->addISelPrepare();
- MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM);
- TM->addMachineFunctionAnalysis(PM, MFInitializer);
+ MachineModuleInfo *MMI = new MachineModuleInfo(TM);
+ MMI->setMachineFunctionInitializer(MFInitializer);
+ PM.add(MMI);
// Enable FastISel with -fast, but allow that to be overridden.
TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
@@ -165,6 +153,11 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
if (PassConfig->addIRTranslator())
return nullptr;
+ PassConfig->addPreLegalizeMachineIR();
+
+ if (PassConfig->addLegalizeMachineIR())
+ return nullptr;
+
// Before running the register bank selector, ask the target if it
// wants to run some passes.
PassConfig->addPreRegBankSelect();
@@ -172,6 +165,21 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
if (PassConfig->addRegBankSelect())
return nullptr;
+ PassConfig->addPreGlobalInstructionSelect();
+
+ if (PassConfig->addGlobalInstructionSelect())
+ return nullptr;
+
+ // Pass to reset the MachineFunction if the ISel failed.
+ PM.add(createResetMachineFunctionPass(
+ PassConfig->reportDiagnosticWhenGlobalISelFallback()));
+
+ // Provide a fallback path when we do not want to abort on
+ // not-yet-supported input.
+ if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
+ PassConfig->addInstSelector())
+ return nullptr;
+
} else if (PassConfig->addInstSelector())
return nullptr;
@@ -179,21 +187,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
PassConfig->setInitialized();
- return &MMI.getContext();
+ return &MMI->getContext();
}
bool LLVMTargetMachine::addPassesToEmitFile(
PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
- AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
+ AnalysisID StopBefore, AnalysisID StopAfter,
+ MachineFunctionInitializer *MFInitializer) {
// Add common CodeGen passes.
MCContext *Context =
addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
- StopAfter, MFInitializer);
+ StopBefore, StopAfter, MFInitializer);
if (!Context)
return true;
- if (StopAfter) {
+ if (StopBefore || StopAfter) {
PM.add(createPrintMIRPass(Out));
return false;
}
@@ -219,7 +228,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+ Options.MCOptions);
auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
*Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@@ -233,7 +243,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+ Options.MCOptions);
if (!MCE || !MAB)
return true;
@@ -261,6 +272,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
return true;
PM.add(Printer);
+ PM.add(createFreeMachineFunctionPass());
return false;
}
@@ -275,7 +287,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
bool DisableVerify) {
// Add common CodeGen passes.
Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
- nullptr);
+ nullptr, nullptr);
if (!Ctx)
return true;
@@ -288,7 +300,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
MCCodeEmitter *MCE =
getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+ Options.MCOptions);
if (!MCE || !MAB)
return true;
@@ -306,6 +319,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
return true;
PM.add(Printer);
+ PM.add(createFreeMachineFunctionPass());
return false; // success!
}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 4321849..86ef898 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -133,7 +133,7 @@ SUnit *LatencyPriorityQueue::pop() {
void LatencyPriorityQueue::remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index b810176..834ed5f 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -222,17 +222,13 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
LexicalScope *WS = WorkStack.back();
const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
bool visitedChildren = false;
- for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
- SE = Children.end();
- SI != SE; ++SI) {
- LexicalScope *ChildScope = *SI;
+ for (auto &ChildScope : Children)
if (!ChildScope->getDFSOut()) {
WorkStack.push_back(ChildScope);
visitedChildren = true;
ChildScope->setDFSIn(++Counter);
break;
}
- }
if (!visitedChildren) {
WorkStack.pop_back();
WS->setDFSOut(++Counter);
@@ -247,10 +243,7 @@ void LexicalScopes::assignInstructionRanges(
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
LexicalScope *PrevLexicalScope = nullptr;
- for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
- RE = MIRanges.end();
- RI != RE; ++RI) {
- const InsnRange &R = *RI;
+ for (const auto &R : MIRanges) {
LexicalScope *S = MI2ScopeMap.lookup(R.first);
assert(S && "Lost LexicalScope for a machine instruction!");
if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
@@ -281,12 +274,8 @@ void LexicalScopes::getMachineBasicBlocks(
}
SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
- for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
- E = InsnRanges.end();
- I != E; ++I) {
- InsnRange &R = *I;
+ for (auto &R : InsnRanges)
MBBs.insert(R.first->getParent());
- }
}
/// dominates - Return true if DebugLoc's lexical scope dominates at least one
@@ -301,9 +290,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
return true;
bool Result = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- if (const DILocation *IDL = I->getDebugLoc())
+ for (auto &I : *MBB) {
+ if (const DILocation *IDL = I.getDebugLoc())
if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
if (Scope->dominates(IScope))
return true;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 4ff88d5..c945376 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +61,26 @@ class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ LexicalScopes LS;
+
+ /// Keeps track of lexical scopes associated with a user value's source
+ /// location.
+ class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+ public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
+
+ /// Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
+ }
+ };
/// Based on std::pair so it can be used as an index into a DenseMap.
typedef std::pair<const DILocalVariable *, const DILocation *>
@@ -83,7 +104,7 @@ private:
struct VarLoc {
const DebugVariable Var;
const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
-
+ mutable UserValueScopes UVS;
enum { InvalidKind = 0, RegisterKind } Kind;
/// The value location. Stored separately to avoid repeatedly
@@ -96,9 +117,9 @@ private:
uint64_t Hash;
} Loc;
- VarLoc(const MachineInstr &MI)
+ VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
- Kind(InvalidKind) {
+ UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -125,6 +146,10 @@ private:
return 0;
}
+ /// Determine whether the lexical scope of this value's debug location
+ /// dominates MBB.
+ bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
+
void dump() const { MI.dump(); }
bool operator==(const VarLoc &Other) const {
@@ -201,7 +226,8 @@ private:
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
- const VarLocMap &VarLocIDs);
+ const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
bool ExtendRanges(MachineFunction &MF);
@@ -217,7 +243,7 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
/// Print to ostream with a message.
@@ -228,6 +254,7 @@ public:
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
};
+
} // namespace
//===----------------------------------------------------------------------===//
@@ -260,6 +287,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLocMap &VarLocIDs,
const char *msg,
raw_ostream &Out) const {
+ Out << '\n' << msg << '\n';
for (const MachineBasicBlock &BB : MF) {
const auto &L = V.lookup(&BB);
Out << "MBB: " << BB.getName() << ":\n";
@@ -268,7 +296,6 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
Out << " Var: " << VL.Var.getVar()->getName();
Out << " MI: ";
VL.dump();
- Out << "\n";
}
}
Out << "\n";
@@ -294,7 +321,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
// Add the VarLoc to OpenRanges from this DBG_VALUE.
// TODO: Currently handles DBG_VALUE which has only reg as location.
if (isDbgValueDescribedByReg(MI)) {
- VarLoc VL(MI);
+ VarLoc VL(MI, LS);
unsigned ID = VarLocIDs.insert(VL);
OpenRanges.insert(ID, VL.Var);
}
@@ -368,7 +395,8 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
- VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) {
+ VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
bool Changed = false;
@@ -376,21 +404,39 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
// For all predecessors of this MBB, find the set of VarLocs that
// can be joined.
+ int NumVisited = 0;
for (auto p : MBB.predecessors()) {
+ // Ignore unvisited predecessor blocks. As we are processing
+ // the blocks in reverse post-order any unvisited block can
+ // be considered to not remove any incoming values.
+ if (!Visited.count(p))
+ continue;
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
if (OL == OutLocs.end())
return false;
- // Just copy over the Out locs to incoming locs for the first predecessor.
- if (p == *MBB.pred_begin()) {
+ // Just copy over the Out locs to incoming locs for the first visited
+ // predecessor, and for all other predecessors join the Out locs.
+ if (!NumVisited)
InLocsT = OL->second;
- continue;
- }
- // Join with this predecessor.
- InLocsT &= OL->second;
+ else
+ InLocsT &= OL->second;
+ NumVisited++;
}
+ // Filter out DBG_VALUES that are out of scope.
+ VarLocSet KillSet;
+ for (auto ID : InLocsT)
+ if (!VarLocIDs[ID].dominates(MBB))
+ KillSet.set(ID);
+ InLocsT.intersectWithComplement(KillSet);
+
+ // As we are processing blocks in reverse post-order we
+ // should have processed at least one predecessor, unless it
+ // is the entry block which has no predecessor.
+ assert((NumVisited || MBB.pred_empty()) &&
+ "Should have processed at least one predecessor");
if (InLocsT.empty())
return false;
@@ -463,16 +509,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// To solve it, we perform join() and transfer() using the two worklist method
// until the ranges converge.
// Ranges have converged when both worklists are empty.
+ SmallPtrSet<const MachineBasicBlock *, 16> Visited;
while (!Worklist.empty() || !Pending.empty()) {
// We track what is on the pending worklist to avoid inserting the same
// thing twice. We could avoid this with a custom priority queue, but this
// is probably not worth it.
SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+ DEBUG(dbgs() << "Processing Worklist\n");
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs);
-
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+ Visited.insert(MBB);
if (MBBJoined) {
MBBJoined = false;
Changed = true;
@@ -505,12 +553,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction()->getSubprogram())
+ // LiveDebugValues will already have removed all DBG_VALUEs.
+ return false;
+
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
+ LS.initialize(MF);
- bool Changed = false;
-
- Changed |= ExtendRanges(MF);
-
+ bool Changed = ExtendRanges(MF);
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 966b4f1..0934d8c 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -22,7 +22,6 @@
#include "LiveDebugVariables.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -76,27 +75,6 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullpt
/// LocMap - Map of where a user value is live, and its location.
typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
-namespace {
-/// UserValueScopes - Keeps track of lexical scopes associated with a
-/// user value's source location.
-class UserValueScopes {
- DebugLoc DL;
- LexicalScopes &LS;
- SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
-
-public:
- UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
-
- /// dominates - Return true if current scope dominates at least one machine
- /// instruction in a given machine basic block.
- bool dominates(MachineBasicBlock *MBB) {
- if (LBlocks.empty())
- LS.getMachineBasicBlocks(DL, LBlocks);
- return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
- }
-};
-} // end anonymous namespace
-
/// UserValue - A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
@@ -221,8 +199,8 @@ public:
I.setValue(getLocationNo(LocMO));
}
- /// extendDef - Extend the current definition as far as possible down the
- /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// extendDef - Extend the current definition as far as possible down.
+ /// Stop when meeting an existing def or when leaving the live
/// range of VNI.
/// End points where VNI is no longer live are added to Kills.
/// @param Idx Starting point for the definition.
@@ -231,12 +209,10 @@ public:
/// @param VNI When LR is not null, this is the value to restrict to.
/// @param Kills Append end points of VNI's live range to Kills.
/// @param LIS Live intervals analysis.
- /// @param MDT Dominator tree.
void extendDef(SlotIndex Idx, unsigned LocNo,
LiveRange *LR, const VNInfo *VNI,
SmallVectorImpl<SlotIndex> *Kills,
- LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS);
+ LiveIntervals &LIS);
/// addDefsFromCopies - The value in LI/LocNo may be copies to other
/// registers. Determine if any of the copies are available at the kill
@@ -254,8 +230,7 @@ public:
/// computeIntervals - Compute the live intervals of all locations after
/// collecting all their def points.
void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS);
+ LiveIntervals &LIS);
/// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
/// live. Returns true if any changes were made.
@@ -283,8 +258,6 @@ class LDVImpl {
LocMap::Allocator allocator;
MachineFunction *MF;
LiveIntervals *LIS;
- LexicalScopes LS;
- MachineDominatorTree *MDT;
const TargetRegisterInfo *TRI;
/// Whether emitDebugValues is called.
@@ -342,7 +315,6 @@ public:
"Dbg values are not emitted in LDV");
EmitDone = false;
ModifiedMF = false;
- LS.reset();
}
/// mapVirtReg - Map virtual register to an equivalence class.
@@ -541,8 +513,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
/// data-flow analysis to propagate them beyond basic block boundaries.
void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
- LiveIntervals &LIS, MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ LiveIntervals &LIS) {
SlotIndex Start = Idx;
MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
SlotIndex Stop = LIS.getMBBEndIdx(MBB);
@@ -660,9 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
void
UserValue::computeIntervals(MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
- LiveIntervals &LIS,
- MachineDominatorTree &MDT,
- UserValueScopes &UVS) {
+ LiveIntervals &LIS) {
SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
// Collect all defs to be extended (Skipping undefs).
@@ -677,7 +646,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
const MachineOperand &Loc = locations[LocNo];
if (!Loc.isReg()) {
- extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS);
continue;
}
@@ -690,7 +659,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
VNI = LI->getVNInfoAt(Idx);
}
SmallVector<SlotIndex, 16> Kills;
- extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS);
if (LI)
addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
continue;
@@ -701,7 +670,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveRange *LR = &LIS.getRegUnit(Unit);
const VNInfo *VNI = LR->getVNInfoAt(Idx);
// Don't track copies from physregs, it is too expensive.
- extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, LR, VNI, nullptr, LIS);
}
// Finally, erase all the undefs.
@@ -714,8 +683,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
void LDVImpl::computeIntervals() {
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
- userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS);
userValues[i]->mapVirtRegs(this);
}
}
@@ -724,9 +692,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
clear();
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
- MDT = &pass.getAnalysis<MachineDominatorTree>();
TRI = mf.getSubtarget().getRegisterInfo();
- LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
@@ -951,7 +917,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
while (!(MI = LIS.getInstructionFromIndex(Idx))) {
// We've reached the beginning of MBB.
if (Idx == Start) {
- MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
return I;
}
Idx = Idx.getPrevIndex();
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 93c5ca7..623af49 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -59,18 +59,32 @@ public:
typedef LiveRange::Segment Segment;
typedef IteratorT iterator;
- VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+ /// A counterpart of LiveRange::createDeadDef: Make sure the range has a
+ /// value defined at @p Def.
+ /// If @p ForVNI is null, and there is no value defined at @p Def, a new
+ /// value will be allocated using @p VNInfoAllocator.
+ /// If @p ForVNI is null, the return value is the value defined at @p Def,
+ /// either a pre-existing one, or the one newly created.
+ /// If @p ForVNI is not null, then @p Def should be the location where
+ /// @p ForVNI is defined. If the range does not have a value defined at
+ /// @p Def, the value @p ForVNI will be used instead of allocating a new
+ /// one. If the range already has a value defined at @p Def, it must be
+ /// same as @p ForVNI. In either case, @p ForVNI will be the return value.
+ VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator *VNInfoAllocator,
+ VNInfo *ForVNI) {
assert(!Def.isDead() && "Cannot define a value at the dead slot");
-
+ assert((!ForVNI || ForVNI->def == Def) &&
+ "If ForVNI is specified, it must match Def");
iterator I = impl().find(Def);
if (I == segments().end()) {
- VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
return VNI;
}
Segment *S = segmentAt(I);
if (SlotIndex::isSameInstr(Def, S->start)) {
+ assert((!ForVNI || ForVNI == S->valno) && "Value number mismatch");
assert(S->valno->def == S->start && "Inconsistent existing value def");
// It is possible to have both normal and early-clobber defs of the same
@@ -84,7 +98,7 @@ public:
return S->valno;
}
assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
- VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
return VNI;
}
@@ -93,7 +107,7 @@ public:
if (segments().empty())
return nullptr;
iterator I =
- impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+ impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
if (I == segments().begin())
return nullptr;
--I;
@@ -104,6 +118,25 @@ public:
return I->valno;
}
+ std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
+ SlotIndex StartIdx, SlotIndex Use) {
+ if (segments().empty())
+ return std::make_pair(nullptr, false);
+ SlotIndex BeforeUse = Use.getPrevSlot();
+ iterator I = impl().findInsertPos(Segment(BeforeUse, Use, nullptr));
+ if (I == segments().begin())
+ return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+ --I;
+ if (I->end <= StartIdx)
+ return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+ if (I->end < Use) {
+ if (LR->isUndefIn(Undefs, I->end, BeforeUse))
+ return std::make_pair(nullptr, true);
+ extendSegmentEndTo(I, Use);
+ }
+ return std::make_pair(I->valno, false);
+ }
+
/// This method is used when we want to extend the segment specified
/// by I to end at the specified endpoint. To do this, we should
/// merge and eliminate all segments that this will overlap
@@ -320,13 +353,20 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
return I;
}
-VNInfo *LiveRange::createDeadDef(SlotIndex Def,
- VNInfo::Allocator &VNInfoAllocator) {
+VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).createDeadDef(Def, &VNIAlloc, nullptr);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).createDeadDef(Def, &VNIAlloc, nullptr);
+}
+
+VNInfo *LiveRange::createDeadDef(VNInfo *VNI) {
// Use the segment set, if it is available.
if (segmentSet != nullptr)
- return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+ return CalcLiveRangeUtilSet(this).createDeadDef(VNI->def, nullptr, VNI);
// Otherwise use the segment vector.
- return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
+ return CalcLiveRangeUtilVector(this).createDeadDef(VNI->def, nullptr, VNI);
}
// overlaps - Return true if the intersection of the two live ranges is
@@ -507,9 +547,15 @@ void LiveRange::append(const Segment S) {
segments.push_back(S);
}
-/// extendInBlock - If this range is live before Kill in the basic
-/// block that starts at StartIdx, extend it to be live up to Kill and return
-/// the value. If there is no live range before Kill, return NULL.
+std::pair<VNInfo*,bool> LiveRange::extendInBlock(ArrayRef<SlotIndex> Undefs,
+ SlotIndex StartIdx, SlotIndex Kill) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).extendInBlock(Undefs, StartIdx, Kill);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).extendInBlock(Undefs, StartIdx, Kill);
+}
+
VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
// Use the segment set, if it is available.
if (segmentSet != nullptr)
@@ -571,7 +617,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+ segments.erase(remove_if(*this, [ValNo](const Segment &S) {
return S.valno == ValNo;
}), end());
// Now that ValNo is dead, remove it.
@@ -824,6 +870,30 @@ unsigned LiveInterval::getSize() const {
return Sum;
}
+void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
+ LaneBitmask LaneMask,
+ const MachineRegisterInfo &MRI,
+ const SlotIndexes &Indexes) const {
+ assert(TargetRegisterInfo::isVirtualRegister(reg));
+ LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
+ assert((VRegMask & LaneMask).any());
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.def_operands(reg)) {
+ if (!MO.isUndef())
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ assert(SubReg != 0 && "Undef should only be set on subreg defs");
+ LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask UndefMask = VRegMask & ~DefMask;
+ if ((UndefMask & LaneMask).any()) {
+ const MachineInstr &MI = *MO.getParent();
+ bool EarlyClobber = MO.isEarlyClobber();
+ SlotIndex Pos = Indexes.getInstructionIndex(MI).getRegSlot(EarlyClobber);
+ Undefs.push_back(Pos);
+ }
+ }
+}
+
raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
}
@@ -912,15 +982,16 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
super::verify();
// Make sure SubRanges are fine and LaneMasks are disjunct.
- LaneBitmask Mask = 0;
- LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+ LaneBitmask Mask;
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg)
+ : LaneBitmask::getAll();
for (const SubRange &SR : subranges()) {
// Subrange lanemask should be disjunct to any previous subrange masks.
- assert((Mask & SR.LaneMask) == 0);
+ assert((Mask & SR.LaneMask).none());
Mask |= SR.LaneMask;
// subrange mask should not contained in maximum lane mask for the vreg.
- assert((Mask & ~MaxMask) == 0);
+ assert((Mask & ~MaxMask).none());
// empty subranges must be removed.
assert(!SR.empty());
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 5f3281f..70d3483 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -58,10 +58,6 @@ static cl::opt<bool> EnablePrecomputePhysRegs(
static bool EnablePrecomputePhysRegs = false;
#endif // NDEBUG
-static cl::opt<bool> EnableSubRegLiveness(
- "enable-subreg-liveness", cl::Hidden, cl::init(true),
- cl::desc("Enable subregister liveness tracking."));
-
namespace llvm {
cl::opt<bool> UseSegmentSetForPhysRegs(
"use-segment-set-for-physregs", cl::Hidden, cl::init(true),
@@ -119,9 +115,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
- if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness())
- MRI->enableSubRegLiveness(true);
-
if (!LRCalc)
LRCalc = new LiveRangeCalc();
@@ -504,8 +497,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
return MayHaveSplitComponents;
}
-void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
-{
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
DEBUG(dbgs() << "Shrink: " << SR << '\n');
assert(TargetRegisterInfo::isVirtualRegister(Reg)
&& "Can only shrink virtual registers");
@@ -514,18 +506,19 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
// Visit all instructions reading Reg.
SlotIndex LastIdx;
- for (MachineOperand &MO : MRI->reg_operands(Reg)) {
- MachineInstr *UseMI = MO.getParent();
- if (UseMI->isDebugValue())
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ // Skip "undef" uses.
+ if (!MO.readsReg())
continue;
// Maybe the operand is for a subregister we don't care about.
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
- if ((LaneMask & SR.LaneMask) == 0)
+ if ((LaneMask & SR.LaneMask).none())
continue;
}
// We only need to visit each instruction once.
+ MachineInstr *UseMI = MO.getParent();
SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
if (Idx == LastIdx)
continue;
@@ -574,11 +567,12 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
}
void LiveIntervals::extendToIndices(LiveRange &LR,
- ArrayRef<SlotIndex> Indices) {
+ ArrayRef<SlotIndex> Indices,
+ ArrayRef<SlotIndex> Undefs) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- LRCalc->extend(LR, Indices[i]);
+ LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
}
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -605,7 +599,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// Find all blocks that are reachable from KillMBB without leaving VNI's live
// range. It is possible that KillMBB itself is reachable, so start a DFS
// from each successor.
- typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
VisitedTy Visited;
for (MachineBasicBlock::succ_iterator
SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
@@ -724,7 +718,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
LaneBitmask DefinedLanesMask;
if (!SRs.empty()) {
// Compute a mask of lanes that are defined.
- DefinedLanesMask = 0;
+ DefinedLanesMask = LaneBitmask::getNone();
for (auto &SRP : SRs) {
const LiveInterval::SubRange &SR = *SRP.first;
LiveRange::const_iterator &I = SRP.second;
@@ -737,7 +731,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
DefinedLanesMask |= SR.LaneMask;
}
} else
- DefinedLanesMask = ~0u;
+ DefinedLanesMask = LaneBitmask::getAll();
bool IsFullWrite = false;
for (const MachineOperand &MO : MI->operands()) {
@@ -746,7 +740,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
if (MO.isUse()) {
// Reading any undefined lanes?
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
- if ((UseMask & ~DefinedLanesMask) != 0)
+ if ((UseMask & ~DefinedLanesMask).any())
goto CancelKill;
} else if (MO.getSubReg() == 0) {
// Writing to the full register?
@@ -954,14 +948,15 @@ public:
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
- LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI.getMaxLaneMaskForVReg(Reg);
for (LiveInterval::SubRange &S : LI.subranges()) {
- if ((S.LaneMask & LaneMask) == 0)
+ if ((S.LaneMask & LaneMask).none())
continue;
updateRange(S, Reg, S.LaneMask);
}
}
- updateRange(LI, Reg, 0);
+ updateRange(LI, Reg, LaneBitmask::getNone());
continue;
}
@@ -969,7 +964,7 @@ public:
// precomputed live range.
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
if (LiveRange *LR = getRegUnitLI(*Units))
- updateRange(*LR, *Units, 0);
+ updateRange(*LR, *Units, LaneBitmask::getNone());
}
if (hasRegMask)
updateRegMaskSlots();
@@ -985,7 +980,7 @@ private:
dbgs() << " ";
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
dbgs() << PrintReg(Reg);
- if (LaneMask != 0)
+ if (LaneMask.any())
dbgs() << " L" << PrintLaneMask(LaneMask);
} else {
dbgs() << PrintRegUnit(Reg, &TRI);
@@ -1039,6 +1034,8 @@ private:
LiveRange::iterator Prev = std::prev(NewIdxIn);
Prev->end = NewIdx.getRegSlot();
}
+ // Extend OldIdxIn.
+ OldIdxIn->end = Next->start;
return;
}
@@ -1317,8 +1314,8 @@ private:
if (MO.isUndef())
continue;
unsigned SubReg = MO.getSubReg();
- if (SubReg != 0 && LaneMask != 0
- && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
+ if (SubReg != 0 && LaneMask.any()
+ && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask).none())
continue;
const MachineInstr &MI = *MO.getParent();
@@ -1394,6 +1391,11 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(endIdx);
SlotIndex lastUseIdx;
+ if (LII == LR.begin()) {
+ // This happens when the function is called for a subregister that only
+ // occurs _after_ the range that is to be repaired.
+ return;
+ }
if (LII != LR.end() && LII->start < endIdx)
lastUseIdx = LII->end;
else
@@ -1420,7 +1422,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
unsigned SubReg = MO.getSubReg();
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
- if ((Mask & LaneMask) == 0)
+ if ((Mask & LaneMask).none())
continue;
if (MO.isDef()) {
@@ -1538,15 +1540,19 @@ void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
}
void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+ // LI may not have the main range computed yet, but its subranges may
+ // be present.
VNInfo *VNI = LI.getVNInfoAt(Pos);
- if (VNI == nullptr)
- return;
- LI.removeValNo(VNI);
+ if (VNI != nullptr) {
+ assert(VNI->def.getBaseIndex() == Pos.getBaseIndex());
+ LI.removeValNo(VNI);
+ }
- // Also remove the value in subranges.
+ // Also remove the value defined in subranges.
for (LiveInterval::SubRange &S : LI.subranges()) {
if (VNInfo *SVNI = S.getVNInfoAt(Pos))
- S.removeValNo(SVNI);
+ if (SVNI->def.getBaseIndex() == Pos.getBaseIndex())
+ S.removeValNo(SVNI);
}
LI.removeEmptySubRanges();
}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 025d99c..fc2f233 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -102,9 +103,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
- SmallVectorImpl<LiveInterval*>::const_iterator I =
- std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
- return I != InterferingVRegs.end();
+ return is_contained(InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 4e2528f..dcc41c1 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -49,7 +49,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
if (!O->isDef())
continue;
unsigned Reg = O->getReg();
- if (Reg == 0)
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
removeReg(Reg);
} else if (O->isRegMask())
@@ -61,7 +61,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
if (!O->isReg() || !O->readsReg())
continue;
unsigned Reg = O->getReg();
- if (Reg == 0)
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
addReg(Reg);
}
@@ -77,7 +77,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
unsigned Reg = O->getReg();
- if (Reg == 0)
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
@@ -141,9 +141,19 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
}
/// Add live-in registers of basic block \p MBB to \p LiveRegs.
-static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
- for (const auto &LI : MBB.liveins())
- LiveRegs.addReg(LI.PhysReg);
+void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins()) {
+ MCSubRegIndexIterator S(LI.PhysReg, TRI);
+ if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) {
+ addReg(LI.PhysReg);
+ continue;
+ }
+ for (; S.isValid(); ++S) {
+ unsigned SI = S.getSubRegIndex();
+ if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any())
+ addReg(S.getSubReg());
+ }
+ }
}
/// Add pristine registers to the given \p LiveRegs. This function removes
@@ -160,12 +170,12 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
// To get the live-outs we simply merge the live-ins of all successors.
for (const MachineBasicBlock *Succ : MBB.successors())
- ::addLiveIns(*this, *Succ);
+ addBlockLiveIns(*Succ);
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid()) {
if (MBB.isReturnBlock()) {
// The return block has no successors whose live-ins we could merge
@@ -182,8 +192,31 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid())
addPristines(*this, MF, MFI, *TRI);
- ::addLiveIns(*this, MBB);
+ addBlockLiveIns(MBB);
+}
+
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+ MachineBasicBlock &MBB) {
+ assert(MBB.livein_empty());
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ LiveRegs.stepBackward(MI);
+
+ for (unsigned Reg : LiveRegs) {
+ // Skip the register if we are about to add one of its super registers.
+ bool ContainsSuperReg = false;
+ for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
+ if (LiveRegs.contains(*SReg)) {
+ ContainsSuperReg = true;
+ break;
+ }
+ }
+ if (ContainsSuperReg)
+ continue;
+ MBB.addLiveIn(Reg);
+ }
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index db91ca1..0128376 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "LiveRangeCalc.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -23,6 +24,7 @@ void LiveRangeCalc::resetLiveOutMap() {
unsigned NumBlocks = MF->getNumBlockIDs();
Seen.clear();
Seen.resize(NumBlocks);
+ EntryInfoMap.clear();
Map.resize(NumBlocks);
}
@@ -64,9 +66,8 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
unsigned SubReg = MO.getSubReg();
if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
- LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
- : MRI->getMaxLaneMaskForVReg(Reg);
-
+ LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
// If this is the first time we see a subregister def, initialize
// subranges by creating a copy of the main range.
if (!LI.hasSubRanges() && !LI.empty()) {
@@ -74,17 +75,19 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
+ LaneBitmask Mask = SubMask;
for (LiveInterval::SubRange &S : LI.subranges()) {
// A Mask for subregs common to the existing subrange and current def.
LaneBitmask Common = S.LaneMask & Mask;
- if (Common == 0)
+ if (Common.none())
continue;
- // A Mask for subregs covered by the subrange but not the current def.
- LaneBitmask LRest = S.LaneMask & ~Mask;
LiveInterval::SubRange *CommonRange;
- if (LRest != 0) {
- // Split current subrange into Common and LRest ranges.
- S.LaneMask = LRest;
+ // A Mask for subregs covered by the subrange but not the current def.
+ LaneBitmask RM = S.LaneMask & ~Mask;
+ if (RM.any()) {
+ // Split the subrange S into two parts: one covered by the current
+ // def (CommonRange), and the one not affected by it (updated S).
+ S.LaneMask = RM;
CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
} else {
assert(Common == S.LaneMask);
@@ -95,7 +98,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
Mask &= ~Common;
}
// Create a new SubRange for subregs we did not cover yet.
- if (Mask != 0) {
+ if (Mask.any()) {
LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
if (MO.isDef())
createDeadDef(*Indexes, *Alloc, *NewRange, MO);
@@ -116,14 +119,15 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
// necessary.
if (LI.hasSubRanges()) {
for (LiveInterval::SubRange &S : LI.subranges()) {
- resetLiveOutMap();
- extendToUses(S, Reg, S.LaneMask);
+ LiveRangeCalc SubLRC;
+ SubLRC.reset(MF, Indexes, DomTree, Alloc);
+ SubLRC.extendToUses(S, Reg, S.LaneMask, &LI);
}
LI.clear();
constructMainRangeFromSubranges(LI);
} else {
resetLiveOutMap();
- extendToUses(LI, Reg, ~0u);
+ extendToUses(LI, Reg, LaneBitmask::getAll());
}
}
@@ -139,9 +143,8 @@ void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
MainRange.createDeadDef(VNI->def, *Alloc);
}
}
-
resetLiveOutMap();
- extendToUses(MainRange, LI.reg);
+ extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
}
void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
@@ -154,29 +157,34 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
}
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
- LaneBitmask Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
+ LiveInterval *LI) {
+ SmallVector<SlotIndex, 4> Undefs;
+ if (LI != nullptr)
+ LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
+
// Visit all operands that read Reg. This may include partial defs.
+ bool IsSubRange = !Mask.all();
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
// Clear all kill flags. They will be reinserted after register allocation
// by LiveIntervalAnalysis::addKillFlags().
if (MO.isUse())
MO.setIsKill(false);
- else {
- // We only care about uses, but on the main range (mask ~0u) this includes
- // the "virtual" reads happening for subregister defs.
- if (Mask != ~0u)
- continue;
- }
-
- if (!MO.readsReg())
+ // MO::readsReg returns "true" for subregister defs. This is for keeping
+ // liveness of the entire register (i.e. for the main range of the live
+ // interval). For subranges, definitions of non-overlapping subregisters
+ // do not count as uses.
+ if (!MO.readsReg() || (IsSubRange && MO.isDef()))
continue;
+
unsigned SubReg = MO.getSubReg();
if (SubReg != 0) {
- LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
- // Ignore uses not covering the current subrange.
- if ((SubRegMask & Mask) == 0)
+ LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef())
+ SLM = ~SLM;
+ // Ignore uses not reading the current (sub)range.
+ if ((SLM & Mask).none())
continue;
}
@@ -205,7 +213,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
// MI is reading Reg. We may have visited MI before if it happens to be
// reading Reg multiple times. That is OK, extend() is idempotent.
- extend(LR, UseIdx, Reg);
+ extend(LR, UseIdx, Reg, Undefs);
}
}
@@ -235,8 +243,8 @@ void LiveRangeCalc::updateFromLiveIns() {
LiveIn.clear();
}
-
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs) {
assert(Use.isValid() && "Invalid SlotIndex");
assert(Indexes && "Missing SlotIndexes");
assert(DomTree && "Missing dominator tree");
@@ -245,14 +253,15 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
assert(UseMBB && "No MBB at Use");
// Is there a def in the same MBB we can extend?
- if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
+ auto EP = LR.extendInBlock(Undefs, Indexes->getMBBStartIdx(UseMBB), Use);
+ if (EP.first != nullptr || EP.second)
return;
// Find the single reaching def, or determine if Use is jointly dominated by
// multiple values, and we may need to create even more phi-defs to preserve
// VNInfo SSA form. Perform a search for all predecessor blocks where we
// know the dominating VNInfo.
- if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
+ if (findReachingDefs(LR, *UseMBB, Use, PhysReg, Undefs))
return;
// When there were multiple different values, we may need new PHIs.
@@ -271,8 +280,72 @@ void LiveRangeCalc::calculateValues() {
}
+bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+ MachineBasicBlock &MBB, BitVector &DefOnEntry,
+ BitVector &UndefOnEntry) {
+ unsigned BN = MBB.getNumber();
+ if (DefOnEntry[BN])
+ return true;
+ if (UndefOnEntry[BN])
+ return false;
+
+ auto MarkDefined =
+ [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+ for (MachineBasicBlock *S : B.successors())
+ DefOnEntry[S->getNumber()] = true;
+ DefOnEntry[BN] = true;
+ return true;
+ };
+
+ SetVector<unsigned> WorkList;
+ // Checking if the entry of MBB is reached by some def: add all predecessors
+ // that are potentially defined-on-exit to the work list.
+ for (MachineBasicBlock *P : MBB.predecessors())
+ WorkList.insert(P->getNumber());
+
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ // Determine if the exit from the block is reached by some def.
+ unsigned N = WorkList[i];
+ MachineBasicBlock &B = *MF->getBlockNumbered(N);
+ if (Seen[N] && Map[&B].first != nullptr)
+ return MarkDefined(B);
+ SlotIndex Begin, End;
+ std::tie(Begin, End) = Indexes->getMBBRange(&B);
+ LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+ if (UB != LR.begin()) {
+ LiveRange::Segment &Seg = *std::prev(UB);
+ if (Seg.end > Begin) {
+ // There is a segment that overlaps B. If the range is not explicitly
+ // undefined between the end of the segment and the end of the block,
+ // treat the block as defined on exit. If it is, go to the next block
+ // on the work list.
+ if (LR.isUndefIn(Undefs, Seg.end, End))
+ continue;
+ return MarkDefined(B);
+ }
+ }
+
+ // No segment overlaps with this block. If this block is not defined on
+ // entry, or it undefines the range, do not process its predecessors.
+ if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) {
+ UndefOnEntry[N] = true;
+ continue;
+ }
+ if (DefOnEntry[N])
+ return MarkDefined(B);
+
+ // Still don't know: add all predecessors to the work list.
+ for (MachineBasicBlock *P : B.predecessors())
+ WorkList.insert(P->getNumber());
+ }
+
+ UndefOnEntry[BN] = true;
+ return false;
+}
+
bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
- SlotIndex Use, unsigned PhysReg) {
+ SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs) {
unsigned UseMBBNum = UseMBB.getNumber();
// Block numbers where LR should be live-in.
@@ -282,6 +355,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
bool UniqueVNI = true;
VNInfo *TheVNI = nullptr;
+ bool FoundUndef = false;
+
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
@@ -294,18 +369,20 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
if (MI != nullptr)
errs() << Use << " " << *MI;
- llvm_unreachable("Use not jointly dominated by defs.");
+ report_fatal_error("Use not jointly dominated by defs.");
}
if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
!MBB->isLiveIn(PhysReg)) {
MBB->getParent()->verify();
- errs() << "The register " << PrintReg(PhysReg)
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ errs() << "The register " << PrintReg(PhysReg, TRI)
<< " needs to be live in to BB#" << MBB->getNumber()
<< ", but is missing from the live-in list.\n";
- llvm_unreachable("Invalid global physical register");
+ report_fatal_error("Invalid global physical register");
}
#endif
+ FoundUndef |= MBB->pred_empty();
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI) {
@@ -326,18 +403,21 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// First time we see Pred. Try to determine the live-out value, but set
// it as null if Pred is live-through with an unknown value.
- VNInfo *VNI = LR.extendInBlock(Start, End);
+ auto EP = LR.extendInBlock(Undefs, Start, End);
+ VNInfo *VNI = EP.first;
+ FoundUndef |= EP.second;
setLiveOutValue(Pred, VNI);
if (VNI) {
if (TheVNI && TheVNI != VNI)
UniqueVNI = false;
TheVNI = VNI;
- continue;
}
+ if (VNI || EP.second)
+ continue;
// No, we need a live-in value for Pred as well
if (Pred != &UseMBB)
- WorkList.push_back(Pred->getNumber());
+ WorkList.push_back(Pred->getNumber());
else
// Loopback to UseMBB, so value is really live through.
Use = SlotIndex();
@@ -345,6 +425,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
}
LiveIn.clear();
+ FoundUndef |= (TheVNI == nullptr);
+ if (Undefs.size() > 0 && FoundUndef)
+ UniqueVNI = false;
// Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
// neither require it. Skip the sorting overhead for small updates.
@@ -353,27 +436,39 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
// If a unique reaching def was found, blit in the live ranges immediately.
if (UniqueVNI) {
+ assert(TheVNI != nullptr);
LiveRangeUpdater Updater(&LR);
- for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
- E = WorkList.end(); I != E; ++I) {
- SlotIndex Start, End;
- std::tie(Start, End) = Indexes->getMBBRange(*I);
- // Trim the live range in UseMBB.
- if (*I == UseMBBNum && Use.isValid())
- End = Use;
- else
- Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
- Updater.add(Start, End, TheVNI);
+ for (unsigned BN : WorkList) {
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(BN);
+ // Trim the live range in UseMBB.
+ if (BN == UseMBBNum && Use.isValid())
+ End = Use;
+ else
+ Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr);
+ Updater.add(Start, End, TheVNI);
}
return true;
}
+ // Prepare the defined/undefined bit vectors.
+ auto EF = EntryInfoMap.find(&LR);
+ if (EF == EntryInfoMap.end()) {
+ unsigned N = MF->getNumBlockIDs();
+ EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
+ EF->second.first.resize(N);
+ EF->second.second.resize(N);
+ }
+ BitVector &DefOnEntry = EF->second.first;
+ BitVector &UndefOnEntry = EF->second.second;
+
// Multiple values were found, so transfer the work list to the LiveIn array
// where UpdateSSA will use it as a work list.
LiveIn.reserve(WorkList.size());
- for (SmallVectorImpl<unsigned>::const_iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
- MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+ for (unsigned BN : WorkList) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
+ if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+ continue;
addLiveInBlock(LR, DomTree->getNode(MBB));
if (MBB == &UseMBB)
LiveIn.back().Kill = Use;
@@ -458,10 +553,12 @@ void LiveRangeCalc::updateSSA() {
I.DomNode = nullptr;
// Add liveness since updateFromLiveIns now skips this node.
- if (I.Kill.isValid())
- LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
- else {
- LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+ if (I.Kill.isValid()) {
+ if (VNI)
+ LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
+ } else {
+ if (VNI)
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
LOP = LiveOutPair(VNI, Node);
}
} else if (IDomValue.first) {
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 9de48b7..1a7598f 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -22,6 +22,7 @@
#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -53,6 +54,19 @@ class LiveRangeCalc {
/// when switching live ranges.
BitVector Seen;
+ /// Map LiveRange to sets of blocks (represented by bit vectors) that
+ /// in the live range are defined on entry and undefined on entry.
+ /// A block is defined on entry if there is a path from at least one of
+ /// the defs in the live range to the entry of the block, and conversely,
+ /// a block is undefined on entry, if there is no such path (i.e. no
+ /// definition reaches the entry of the block). A single LiveRangeCalc
+ /// object is used to track live-out information for multiple registers
+ /// in live range splitting (which is ok, since the live ranges of these
+ /// registers do not overlap), but the defined/undefined information must
+ /// be kept separate for each individual range.
+ /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
+ std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+
/// Map each basic block where a live range is live out to the live-out value
/// and its defining block.
///
@@ -101,18 +115,31 @@ class LiveRangeCalc {
/// used to add entries directly.
SmallVector<LiveInBlock, 16> LiveIn;
- /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
- /// reach it.
+ /// Check if the entry to block @p MBB can be reached by any of the defs
+ /// in @p LR. Return true if none of the defs reach the entry to @p MBB.
+ bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+ MachineBasicBlock &MBB, BitVector &DefOnEntry,
+ BitVector &UndefOnEntry);
+
+ /// Find the set of defs that can reach @p Kill. @p Kill must belong to
+ /// @p UseMBB.
///
- /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
- /// are added to @p LR, and the function returns true.
+ /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill,
+ /// all paths from the def to @p UseMBB are added to @p LR, and the function
+ /// returns true.
///
/// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
/// live in are added to the LiveIn array, and the function returns false.
///
+ /// The array @p Undef provides the locations where the range @p LR becomes
+ /// undefined by <def,read-undef> operands on other subranges. If @p Undef
+ /// is non-empty and @p Kill is jointly dominated only by the entries of
+ /// @p Undef, the function returns false.
+ ///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
- SlotIndex Kill, unsigned PhysReg);
+ SlotIndex Kill, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs);
/// updateSSA - Compute the values that will be live in to all requested
/// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
@@ -127,9 +154,16 @@ class LiveRangeCalc {
/// Extend the live range of @p LR to reach all uses of Reg.
///
- /// All uses must be jointly dominated by existing liveness. PHI-defs are
- /// inserted as needed to preserve SSA form.
- void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
+ /// If @p LR is a main range, or if @p LI is null, then all uses must be
+ /// jointly dominated by the definitions from @p LR. If @p LR is a subrange
+ /// of the live interval @p LI, corresponding to lane mask @p LaneMask,
+ /// all uses must be jointly dominated by the definitions from @p LR
+ /// together with definitions of other lanes where @p LR becomes undefined
+ /// (via <def,read-undef> operands).
+ /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e.
+ /// LaneBitmask::getAll().
+ void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask,
+ LiveInterval *LI = nullptr);
/// Reset Map and Seen fields.
void resetLiveOutMap();
@@ -169,7 +203,8 @@ public:
/// inserted as required to preserve SSA form.
///
/// PhysReg, when set, is used to verify live-in lists on basic blocks.
- void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
+ void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+ ArrayRef<SlotIndex> Undefs);
/// createDeadDefs - Create a dead def in LI for every def operand of Reg.
/// Each instruction defining Reg gets a new VNInfo with a corresponding
@@ -181,7 +216,7 @@ public:
/// All uses must be jointly dominated by existing liveness. PHI-defs are
/// inserted as needed to preserve SSA form.
void extendToUses(LiveRange &LR, unsigned PhysReg) {
- extendToUses(LR, PhysReg, ~0u);
+ extendToUses(LR, PhysReg, LaneBitmask::getAll());
}
/// Calculates liveness for the register specified in live interval @p LI.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index b35c0ad..7f1c69c 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,13 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ // Create empty subranges if the OldReg's interval has them. Do not create
+ // the main range here---it will be constructed later after the subranges
+ // have been finalized.
+ LiveInterval &OldLI = LIS.getInterval(OldReg);
+ VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &S : OldLI.subranges())
+ LI.createSubRange(Alloc, S.LaneMask);
return LI;
}
@@ -66,6 +73,8 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
unsigned Original = VRM->getOriginal(getReg());
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ if (!OrigVNI)
+ continue;
MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (!DefMI)
continue;
@@ -94,7 +103,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
// We can't remat physreg uses, unless it is a constant.
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+ if (MRI.isConstantPhysReg(MO.getReg()))
continue;
return false;
}
@@ -227,7 +236,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
unsigned SubReg = MO.getSubReg();
LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
for (const LiveInterval::SubRange &S : LI.subranges()) {
- if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
+ if ((S.LaneMask & LaneMask).any() && S.Query(Idx).isKill())
return true;
}
return false;
@@ -263,7 +272,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
bool ReadsPhysRegs = false;
bool isOrigDef = false;
unsigned Dest;
- if (VRM && MI->getOperand(0).isReg()) {
+ // Only optimize rematerialize case when the instruction has one def, since
+ // otherwise we could leave some dead defs in the code. This case is
+ // extremely rare.
+ if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ MI->getDesc().getNumDefs() == 1) {
Dest = MI->getOperand(0).getReg();
unsigned Original = VRM->getOriginal(Dest);
LiveInterval &OrigLI = LIS.getInterval(Original);
@@ -335,6 +348,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// allocations of the func are done.
if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+ NewLI.removeEmptySubRanges();
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
pop_back();
@@ -428,6 +442,9 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
if (VRM)
VRM->grow();
+ if (Parent && !Parent->isSpillable())
+ LIS.getInterval(VReg).markNotSpillable();
+
NewRegs.push_back(VReg);
}
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 7ee87c1..7a51386 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -70,15 +70,16 @@ void LiveRegMatrix::releaseMemory() {
}
}
-template<typename Callable>
-bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
- unsigned PhysReg, Callable Func) {
+template <typename Callable>
+static bool foreachUnit(const TargetRegisterInfo *TRI,
+ LiveInterval &VRegInterval, unsigned PhysReg,
+ Callable Func) {
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
LaneBitmask Mask = (*Units).second;
for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
- if (S.LaneMask & Mask) {
+ if ((S.LaneMask & Mask).any()) {
if (Func(Unit, S))
return true;
break;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index dd87216..269b990a31 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -643,7 +643,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// register before its uses due to dominance properties of SSA (except for PHI
// nodes, which are treated as a special case).
MachineBasicBlock *Entry = &MF->front();
- SmallPtrSet<MachineBasicBlock*,16> Visited;
+ df_iterator_default_set<MachineBasicBlock*,16> Visited;
for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
runOnBlock(MBB, NumRegs);
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index af7392f..e189fb0 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,12 +51,21 @@ namespace {
MachineBasicBlock::iterator MI; // Instr referencing the frame
int64_t LocalOffset; // Local offset of the frame idx referenced
int FrameIdx; // The frame index
+
+ // Order reference instruction appears in program. Used to ensure
+ // deterministic order when multiple instructions may reference the same
+ // location.
+ unsigned Order;
+
public:
- FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
- MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
+ FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
+ MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+
bool operator<(const FrameRef &RHS) const {
- return LocalOffset < RHS.LocalOffset;
+ return std::tie(LocalOffset, FrameIdx, Order) <
+ std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
}
+
MachineBasicBlock::iterator getMachineInstr() const { return MI; }
int64_t getLocalOffset() const { return LocalOffset; }
int getFrameIndex() const { return FrameIdx; }
@@ -67,17 +76,17 @@ namespace {
/// StackObjSet - A set of stack object indexes
typedef SmallSetVector<int, 8> StackObjSet;
- void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
bool StackGrowsDown, unsigned &MaxAlign);
void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
- MachineFrameInfo *MFI, bool StackGrowsDown,
+ MachineFrameInfo &MFI, bool StackGrowsDown,
int64_t &Offset, unsigned &MaxAlign);
void calculateFrameObjectOffsets(MachineFunction &Fn);
bool insertFrameReferenceRegisters(MachineFunction &Fn);
public:
static char ID; // Pass identification, replacement for typeid
- explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -102,9 +111,9 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+ unsigned LocalObjectCount = MFI.getObjectIndexEnd();
// If the target doesn't want/need this pass, or if there are no locals
// to consider, early exit.
@@ -112,7 +121,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
return true;
// Make sure we have enough space to store the local offsets.
- LocalOffsets.resize(MFI->getObjectIndexEnd());
+ LocalOffsets.resize(MFI.getObjectIndexEnd());
// Lay out the local blob.
calculateFrameObjectOffsets(MF);
@@ -125,21 +134,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
// Otherwise, PEI can do a bit better job of getting the alignment right
// without a hole at the start since it knows the alignment of the stack
// at the start of local allocation, and this pass doesn't.
- MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+ MFI.setUseLocalStackAllocationBlock(UsedBaseRegs);
return true;
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
int FrameIdx, int64_t &Offset,
bool StackGrowsDown,
unsigned &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
- Offset += MFI->getObjectSize(FrameIdx);
+ Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
@@ -154,10 +163,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
// Keep the offset available for base register allocation
LocalOffsets[FrameIdx] = LocalOffset;
// And tell MFI about it for PEI to use later
- MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+ MFI.mapLocalFrameObject(FrameIdx, LocalOffset);
if (!StackGrowsDown)
- Offset += MFI->getObjectSize(FrameIdx);
+ Offset += MFI.getObjectSize(FrameIdx);
++NumAllocations;
}
@@ -166,7 +175,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
/// those required to be close to the Stack Protector) to stack offsets.
void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
- MachineFrameInfo *MFI,
+ MachineFrameInfo &MFI,
bool StackGrowsDown, int64_t &Offset,
unsigned &MaxAlign) {
@@ -183,7 +192,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
///
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
- MachineFrameInfo *MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
@@ -194,22 +203,22 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI->getStackProtectorIndex() >= 0) {
+ if (MFI.getStackProtectorIndex() >= 0) {
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
StackGrowsDown, MaxAlign);
// Assign large stack objects first.
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i)
+ if (MFI.getStackProtectorIndex() == (int)i)
continue;
- switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
case StackProtector::SSPLK_None:
continue;
case StackProtector::SSPLK_SmallArray:
@@ -235,10 +244,10 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Then assign frame offsets to stack objects that are not used to spill
// callee saved registers.
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i)
+ if (MFI.getStackProtectorIndex() == (int)i)
continue;
if (ProtectedObjs.count(i))
continue;
@@ -247,8 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
// Remember how big this blob of stack space is
- MFI->setLocalFrameSize(Offset);
- MFI->setLocalFrameMaxAlign(MaxAlign);
+ MFI.setLocalFrameSize(Offset);
+ MFI.setLocalFrameMaxAlign(MaxAlign);
}
static inline bool
@@ -273,7 +282,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// and ask the target to create a defining instruction for it.
bool UsedBaseReg = false;
- MachineFrameInfo *MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
bool StackGrowsDown =
@@ -285,6 +294,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
+ unsigned Order = 0;
+
for (MachineBasicBlock &BB : Fn) {
for (MachineInstr &MI : BB) {
// Debug value, stackmap and patchpoint instructions can't be out of
@@ -305,21 +316,22 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// an object allocated in the local block.
if (MI.getOperand(i).isFI()) {
// Don't try this with values not in the local block.
- if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex()))
+ if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
break;
int Idx = MI.getOperand(i).getIndex();
int64_t LocalOffset = LocalOffsets[Idx];
if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
- FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx));
+ FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
break;
}
}
}
}
- // Sort the frame references by local offset
- array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ // Sort the frame references by local offset.
+ // Use frame index as a tie-breaker in case MI's have the same offset.
+ std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
MachineBasicBlock *Entry = &Fn.front();
@@ -332,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineInstr &MI = *FR.getMachineInstr();
int64_t LocalOffset = FR.getLocalOffset();
int FrameIdx = FR.getFrameIndex();
- assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ assert(MFI.isObjectPreAllocated(FrameIdx) &&
"Only pre-allocated locals expected!");
DEBUG(dbgs() << "Considering: " << MI);
@@ -349,7 +361,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
assert(idx < MI.getNumOperands() && "Cannot find FI operand");
int64_t Offset = 0;
- int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+ int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0;
DEBUG(dbgs() << " Replacing FI in: " << MI);
diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
new file mode 100644
index 0000000..d74b730
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
@@ -0,0 +1,71 @@
+//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+LLT::LLT(Type &Ty, const DataLayout &DL) {
+ if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+ SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ ElementsOrAddrSpace = VTy->getNumElements();
+ Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ Kind = Pointer;
+ SizeInBits = DL.getTypeSizeInBits(&Ty);
+ ElementsOrAddrSpace = PTy->getAddressSpace();
+ } else if (Ty.isSized()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ Kind = Scalar;
+ SizeInBits = DL.getTypeSizeInBits(&Ty);
+ ElementsOrAddrSpace = 1;
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ } else {
+ Kind = Invalid;
+ SizeInBits = ElementsOrAddrSpace = 0;
+ }
+}
+
+LLT::LLT(MVT VT) {
+ if (VT.isVector()) {
+ SizeInBits = VT.getVectorElementType().getSizeInBits();
+ ElementsOrAddrSpace = VT.getVectorNumElements();
+ Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ } else if (VT.isValid()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ Kind = Scalar;
+ SizeInBits = VT.getSizeInBits();
+ ElementsOrAddrSpace = 1;
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ } else {
+ Kind = Invalid;
+ SizeInBits = ElementsOrAddrSpace = 0;
+ }
+}
+
+void LLT::print(raw_ostream &OS) const {
+ if (isVector())
+ OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
+ else if (isPointer())
+ OS << "p" << getAddressSpace();
+ else if (isValid()) {
+ assert(isScalar() && "unexpected type");
+ OS << "s" << getScalarSizeInBits();
+ } else
+ llvm_unreachable("trying to print an invalid type");
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 6e3de52..1f1ce6e 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -173,14 +173,20 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
return C;
}
-static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
- if (C.peek() != 'i' || !isdigit(C.peek(1)))
+static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) {
+ if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') ||
+ !isdigit(C.peek(1)))
return None;
+ char Kind = C.peek();
auto Range = C;
- C.advance(); // Skip 'i'
+ C.advance(); // Skip 'i', 's', or 'p'
while (isdigit(C.peek()))
C.advance();
- Token.reset(MIToken::IntegerType, Range.upto(C));
+
+ Token.reset(Kind == 'i'
+ ? MIToken::IntegerType
+ : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType),
+ Range.upto(C));
return C;
}
@@ -199,12 +205,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("tied-def", MIToken::kw_tied_def)
.Case("frame-setup", MIToken::kw_frame_setup)
.Case("debug-location", MIToken::kw_debug_location)
- .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
- .Case(".cfi_offset", MIToken::kw_cfi_offset)
- .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
- .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
- .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("same_value", MIToken::kw_cfi_same_value)
+ .Case("offset", MIToken::kw_cfi_offset)
+ .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case("def_cfa", MIToken::kw_cfi_def_cfa)
.Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
.Case("half", MIToken::kw_half)
.Case("float", MIToken::kw_float)
@@ -215,6 +222,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("target-flags", MIToken::kw_target_flags)
.Case("volatile", MIToken::kw_volatile)
.Case("non-temporal", MIToken::kw_non_temporal)
+ .Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
.Case("stack", MIToken::kw_stack)
@@ -227,11 +235,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("landing-pad", MIToken::kw_landing_pad)
.Case("liveins", MIToken::kw_liveins)
.Case("successors", MIToken::kw_successors)
+ .Case("floatpred", MIToken::kw_floatpred)
+ .Case("intpred", MIToken::kw_intpred)
.Default(MIToken::Identifier);
}
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
- if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
+ if (!isalpha(C.peek()) && C.peek() != '_')
return None;
auto Range = C;
while (isIdentifierChar(C.peek()))
@@ -366,6 +376,11 @@ static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
return C;
}
+/// Returns true for a character allowed in a register name.
+static bool isRegisterChar(char C) {
+ return isIdentifierChar(C) && C != '.';
+}
+
static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
if (C.peek() != '%')
return None;
@@ -373,7 +388,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
return lexVirtualRegister(C, Token);
auto Range = C;
C.advance(); // Skip '%'
- while (isIdentifierChar(C.peek()))
+ while (isRegisterChar(C.peek()))
C.advance();
Token.reset(MIToken::NamedRegister, Range.upto(C))
.setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
@@ -409,19 +424,6 @@ static bool isValidHexFloatingPointPrefix(char C) {
return C == 'H' || C == 'K' || C == 'L' || C == 'M';
}
-static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
- if (C.peek() != '0' || C.peek(1) != 'x')
- return None;
- Cursor Range = C;
- C.advance(2); // Skip '0x'
- if (isValidHexFloatingPointPrefix(C.peek()))
- C.advance();
- while (isxdigit(C.peek()))
- C.advance();
- Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
- return C;
-}
-
static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
C.advance();
// Skip over [0-9]*([eE][-+]?[0-9]+)?
@@ -438,6 +440,28 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
return C;
}
+static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
+ return None;
+ Cursor Range = C;
+ C.advance(2);
+ unsigned PrefLen = 2;
+ if (isValidHexFloatingPointPrefix(C.peek())) {
+ C.advance();
+ PrefLen++;
+ }
+ while (isxdigit(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ if (StrVal.size() <= PrefLen)
+ return None;
+ if (PrefLen == 2)
+ Token.reset(MIToken::HexLiteral, Range.upto(C));
+ else // It must be 3, which means that there was a floating-point prefix.
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
return None;
@@ -485,6 +509,8 @@ static MIToken::TokenKind symbolToken(char C) {
switch (C) {
case ',':
return MIToken::comma;
+ case '.':
+ return MIToken::dot;
case '=':
return MIToken::equal;
case ':':
@@ -566,7 +592,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return C.remaining();
}
- if (Cursor R = maybeLexIntegerType(C, Token))
+ if (Cursor R = maybeLexIntegerOrScalarType(C, Token))
return R.remaining();
if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
return R.remaining();
@@ -592,7 +618,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
return R.remaining();
- if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+ if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
return R.remaining();
if (Cursor R = maybeLexNumericalLiteral(C, Token))
return R.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 32fc8ab..edba749 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -38,6 +38,7 @@ struct MIToken {
underscore,
colon,
coloncolon,
+ dot,
exclaim,
lparen,
rparen,
@@ -53,6 +54,7 @@ struct MIToken {
kw_implicit_define,
kw_def,
kw_dead,
+ kw_dereferenceable,
kw_killed,
kw_undef,
kw_internal,
@@ -67,6 +69,7 @@ struct MIToken {
kw_cfi_def_cfa_offset,
kw_cfi_def_cfa,
kw_blockaddress,
+ kw_intrinsic,
kw_target_index,
kw_half,
kw_float,
@@ -89,6 +92,8 @@ struct MIToken {
kw_landing_pad,
kw_liveins,
kw_successors,
+ kw_floatpred,
+ kw_intpred,
// Named metadata keywords
md_tbaa,
@@ -102,6 +107,8 @@ struct MIToken {
NamedRegister,
MachineBasicBlockLabel,
MachineBasicBlock,
+ PointerType,
+ ScalarType,
StackObject,
FixedStackObject,
NamedGlobalValue,
@@ -111,6 +118,7 @@ struct MIToken {
// Other tokens
IntegerLiteral,
FloatingPointLiteral,
+ HexLiteral,
VirtualRegister,
ConstantPoolItem,
JumpTableIndex,
@@ -160,7 +168,7 @@ public:
bool isMemoryOperandFlag() const {
return Kind == kw_volatile || Kind == kw_non_temporal ||
- Kind == kw_invariant;
+ Kind == kw_dereferenceable || Kind == kw_invariant;
}
bool is(TokenKind K) const { return Kind == K; }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index b3fd16f..c8bed08 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,6 +14,7 @@
#include "MIParser.h"
#include "MILexer.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,13 +27,16 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
using namespace llvm;
@@ -41,6 +45,17 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
: MF(MF), SM(&SM), IRSlots(IRSlots) {
}
+VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
+ auto I = VRegInfos.insert(std::make_pair(Num, nullptr));
+ if (I.second) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VRegInfo *Info = new (Allocator) VRegInfo;
+ Info->VReg = MRI.createIncompleteVirtualRegister();
+ I.first->second = Info;
+ }
+ return *I.first->second;
+}
+
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -65,7 +80,7 @@ class MIParser {
SMDiagnostic &Error;
StringRef Source, CurrentSource;
MIToken Token;
- const PerFunctionMIParsingState &PFS;
+ PerFunctionMIParsingState &PFS;
/// Maps from instruction names to op codes.
StringMap<unsigned> Names2InstrOpCodes;
/// Maps from register names to registers.
@@ -86,7 +101,7 @@ class MIParser {
StringMap<unsigned> Names2BitmaskTargetFlags;
public:
- MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
StringRef Source);
/// \p SkipChar gives the number of characters to skip before looking
@@ -109,7 +124,8 @@ public:
bool parse(MachineInstr *&MI);
bool parseStandaloneMBB(MachineBasicBlock *&MBB);
bool parseStandaloneNamedRegister(unsigned &Reg);
- bool parseStandaloneVirtualRegister(unsigned &Reg);
+ bool parseStandaloneVirtualRegister(VRegInfo *&Info);
+ bool parseStandaloneRegister(unsigned &Reg);
bool parseStandaloneStackObject(int &FI);
bool parseStandaloneMDNode(MDNode *&Node);
@@ -119,21 +135,19 @@ public:
bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
- bool parseRegister(unsigned &Reg);
+ bool parseNamedRegister(unsigned &Reg);
+ bool parseVirtualRegister(VRegInfo *&Info);
+ bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
- bool parseSize(unsigned &Size);
bool parseRegisterOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx, bool IsDef = false);
bool parseImmediateOperand(MachineOperand &Dest);
bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
const Constant *&C);
bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
- bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read,
- Type *&Ty);
- // \p MustBeSized defines whether or not \p Ty must be sized.
- bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true);
+ bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty);
bool parseTypedImmediateOperand(MachineOperand &Dest);
bool parseFPImmediateOperand(MachineOperand &Dest);
bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -155,6 +169,8 @@ public:
bool parseCFIOperand(MachineOperand &Dest);
bool parseIRBlock(BasicBlock *&BB, const Function &F);
bool parseBlockAddressOperand(MachineOperand &Dest);
+ bool parseIntrinsicOperand(MachineOperand &Dest);
+ bool parsePredicateOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
bool parseMachineOperand(MachineOperand &Dest,
@@ -181,6 +197,12 @@ private:
/// Return true if an error occurred.
bool getUint64(uint64_t &Result);
+ /// Convert the hexadecimal literal in the current token into an unsigned
+ /// APInt with a minimum bitwidth required to represent the value.
+ ///
+ /// Return true if the literal does not represent an integer value.
+ bool getHexUint(APInt &Result);
+
/// If the current token is of the given kind, consume it and return false.
/// Otherwise report an error and return true.
bool expectAndConsume(MIToken::TokenKind TokenKind);
@@ -254,7 +276,7 @@ private:
} // end anonymous namespace
-MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
StringRef Source)
: MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
{}
@@ -362,7 +384,7 @@ bool MIParser::parseBasicBlockDefinition(
if (!Name.empty()) {
BB = dyn_cast_or_null<BasicBlock>(
- MF.getFunction()->getValueSymbolTable().lookup(Name));
+ MF.getFunction()->getValueSymbolTable()->lookup(Name));
if (!BB)
return error(Loc, Twine("basic block '") + Name +
"' is not defined in the function '" +
@@ -437,10 +459,24 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
unsigned Reg = 0;
- if (parseRegister(Reg))
+ if (parseNamedRegister(Reg))
return true;
- MBB.addLiveIn(Reg);
lex();
+ LaneBitmask Mask = LaneBitmask::getAll();
+ if (consumeIfPresent(MIToken::colon)) {
+ // Parse lane mask.
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
+ return error("expected a lane mask");
+ static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned),
+ "Use correct get-function for lane mask");
+ LaneBitmask::Type V;
+ if (getUnsigned(V))
+ return error("invalid lane mask value");
+ Mask = LaneBitmask(V);
+ lex();
+ }
+ MBB.addLiveIn(Reg, Mask);
} while (consumeIfPresent(MIToken::comma));
return false;
}
@@ -461,7 +497,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
lex();
unsigned Weight = 0;
if (consumeIfPresent(MIToken::lparen)) {
- if (Token.isNot(MIToken::IntegerLiteral))
+ if (Token.isNot(MIToken::IntegerLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
return error("expected an integer literal after '('");
if (getUnsigned(Weight))
return true;
@@ -597,14 +634,6 @@ bool MIParser::parse(MachineInstr *&MI) {
if (Token.isError() || parseInstruction(OpCode, Flags))
return true;
- Type *Ty = nullptr;
- if (isPreISelGenericOpcode(OpCode)) {
- // For generic opcode, a type is mandatory.
- auto Loc = Token.location();
- if (parseIRType(Loc, Ty))
- return true;
- }
-
// Parse the remaining machine operands.
while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
@@ -660,8 +689,6 @@ bool MIParser::parse(MachineInstr *&MI) {
// TODO: Check for extraneous machine operands.
MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
MI->setFlags(Flags);
- if (Ty)
- MI->setType(Ty);
for (const auto &Operand : Operands)
MI->addOperand(MF, Operand.Operand);
if (assignRegisterTies(*MI, Operands))
@@ -692,7 +719,7 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
lex();
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
- if (parseRegister(Reg))
+ if (parseNamedRegister(Reg))
return true;
lex();
if (Token.isNot(MIToken::Eof))
@@ -700,12 +727,28 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
return false;
}
-bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) {
lex();
if (Token.isNot(MIToken::VirtualRegister))
return error("expected a virtual register");
- if (parseRegister(Reg))
+ if (parseVirtualRegister(Info))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::NamedRegister) &&
+ Token.isNot(MIToken::VirtualRegister))
+ return error("expected either a named or virtual register");
+
+ VRegInfo *Info;
+ if (parseRegister(Reg, Info))
return true;
+
lex();
if (Token.isNot(MIToken::Eof))
return error("expected end of string after the register reference");
@@ -800,33 +843,39 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
return false;
}
-bool MIParser::parseRegister(unsigned &Reg) {
+bool MIParser::parseNamedRegister(unsigned &Reg) {
+ assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
+ StringRef Name = Token.stringValue();
+ if (getRegisterByName(Name, Reg))
+ return error(Twine("unknown register name '") + Name + "'");
+ return false;
+}
+
+bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
+ assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ Info = &PFS.getVRegInfo(ID);
+ return false;
+}
+
+bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
switch (Token.kind()) {
case MIToken::underscore:
Reg = 0;
- break;
- case MIToken::NamedRegister: {
- StringRef Name = Token.stringValue();
- if (getRegisterByName(Name, Reg))
- return error(Twine("unknown register name '") + Name + "'");
- break;
- }
- case MIToken::VirtualRegister: {
- unsigned ID;
- if (getUnsigned(ID))
+ return false;
+ case MIToken::NamedRegister:
+ return parseNamedRegister(Reg);
+ case MIToken::VirtualRegister:
+ if (parseVirtualRegister(Info))
return true;
- const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
- if (RegInfo == PFS.VirtualRegisterSlots.end())
- return error(Twine("use of undefined virtual register '%") + Twine(ID) +
- "'");
- Reg = RegInfo->second;
- break;
- }
+ Reg = Info->VReg;
+ return false;
// TODO: Parse other register kinds.
default:
llvm_unreachable("The current token should be a register");
}
- return false;
}
bool MIParser::parseRegisterFlag(unsigned &Flags) {
@@ -871,10 +920,10 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
}
bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
- assert(Token.is(MIToken::colon));
+ assert(Token.is(MIToken::dot));
lex();
if (Token.isNot(MIToken::Identifier))
- return error("expected a subregister index after ':'");
+ return error("expected a subregister index after '.'");
auto Name = Token.stringValue();
SubReg = getSubRegIndex(Name);
if (!SubReg)
@@ -885,7 +934,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
if (!consumeIfPresent(MIToken::kw_tied_def))
- return error("expected 'tied-def' after '('");
+ return true;
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected an integer literal after 'tied-def'");
if (getUnsigned(TiedDefIdx))
@@ -896,17 +945,6 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
return false;
}
-bool MIParser::parseSize(unsigned &Size) {
- if (Token.isNot(MIToken::IntegerLiteral))
- return error("expected an integer literal for the size");
- if (getUnsigned(Size))
- return true;
- lex();
- if (expectAndConsume(MIToken::rparen))
- return true;
- return false;
-}
-
bool MIParser::assignRegisterTies(MachineInstr &MI,
ArrayRef<ParsedMachineOperand> Operands) {
SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
@@ -947,7 +985,6 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
bool MIParser::parseRegisterOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx,
bool IsDef) {
- unsigned Reg;
unsigned Flags = IsDef ? RegState::Define : 0;
while (Token.isRegisterFlag()) {
if (parseRegisterFlag(Flags))
@@ -955,38 +992,62 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
}
if (!Token.isRegister())
return error("expected a register after register flags");
- if (parseRegister(Reg))
+ unsigned Reg;
+ VRegInfo *RegInfo;
+ if (parseRegister(Reg, RegInfo))
return true;
lex();
unsigned SubReg = 0;
- if (Token.is(MIToken::colon)) {
+ if (Token.is(MIToken::dot)) {
if (parseSubRegisterIndex(SubReg))
return true;
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
+ MachineRegisterInfo &MRI = MF.getRegInfo();
if ((Flags & RegState::Define) == 0) {
if (consumeIfPresent(MIToken::lparen)) {
unsigned Idx;
- if (parseRegisterTiedDefIndex(Idx))
- return true;
- TiedDefIdx = Idx;
+ if (!parseRegisterTiedDefIndex(Idx))
+ TiedDefIdx = Idx;
+ else {
+ // Try a redundant low-level type.
+ LLT Ty;
+ if (parseLowLevelType(Token.location(), Ty))
+ return error("expected tied-def or low-level type after '('");
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+ return error("inconsistent type for generic virtual register");
+
+ MRI.setType(Reg, Ty);
+ }
}
} else if (consumeIfPresent(MIToken::lparen)) {
- // Virtual registers may have a size with GlobalISel.
+ // Virtual registers may have a tpe with GlobalISel.
if (!TargetRegisterInfo::isVirtualRegister(Reg))
- return error("unexpected size on physical register");
- unsigned Size;
- if (parseSize(Size))
+ return error("unexpected type on physical register");
+
+ LLT Ty;
+ if (parseLowLevelType(Token.location(), Ty))
return true;
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MRI.setSize(Reg, Size);
- } else if (PFS.GenericVRegs.count(Reg)) {
- // Generic virtual registers must have a size.
- // If we end up here this means the size hasn't been specified and
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+
+ if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+ return error("inconsistent type for generic virtual register");
+
+ MRI.setType(Reg, Ty);
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Generic virtual registers must have a type.
+ // If we end up here this means the type hasn't been specified and
// this is bad!
- return error("generic virtual registers must have a size");
+ if (RegInfo->Kind == VRegInfo::GENERIC ||
+ RegInfo->Kind == VRegInfo::REGBANK)
+ return error("generic virtual registers must have a type");
}
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
@@ -1010,7 +1071,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
const Constant *&C) {
auto Source = StringValue.str(); // The source has to be null terminated.
SMDiagnostic Err;
- C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+ C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(),
&PFS.IRSlots);
if (!C)
return error(Loc + Err.getColumnNo(), Err.getMessage());
@@ -1024,35 +1085,45 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
return false;
}
-bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue,
- unsigned &Read, Type *&Ty) {
- auto Source = StringValue.str(); // The source has to be null terminated.
- SMDiagnostic Err;
- Ty = parseTypeAtBeginning(Source.c_str(), Read, Err,
- *MF.getFunction()->getParent(), &PFS.IRSlots);
- if (!Ty)
- return error(Loc + Err.getColumnNo(), Err.getMessage());
- return false;
-}
+bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
+ if (Token.is(MIToken::ScalarType)) {
+ Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+ lex();
+ return false;
+ } else if (Token.is(MIToken::PointerType)) {
+ const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
+ unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+ lex();
+ return false;
+ }
-bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty,
- bool MustBeSized) {
- // At this point we enter in the IR world, i.e., to get the correct type,
- // we need to hand off the whole string, not just the current token.
- // E.g., <4 x i64> would give '<' as a token and there is not much
- // the IR parser can do with that.
- unsigned Read = 0;
- if (parseIRType(Loc, StringRef(Loc), Read, Ty))
- return true;
- // The type must be sized, otherwise there is not much the backend
- // can do with it.
- if (MustBeSized && !Ty->isSized())
- return error("expected a sized type");
- // The next token is Read characters from the Loc.
- // However, the current location is not Loc, but Loc + the length of Token.
- // Therefore, subtract the length of Token (range().end() - Loc) to the
- // number of characters to skip before the next token.
- lex(Read - (Token.range().end() - Loc));
+ // Now we're looking for a vector.
+ if (Token.isNot(MIToken::less))
+ return error(Loc,
+ "expected unsized, pN, sN or <N x sM> for GlobalISel type");
+
+ lex();
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error(Loc, "expected <N x sM> for vctor type");
+ uint64_t NumElements = Token.integerValue().getZExtValue();
+ lex();
+
+ if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
+ return error(Loc, "expected '<N x sM>' for vector type");
+ lex();
+
+ if (Token.isNot(MIToken::ScalarType))
+ return error(Loc, "expected '<N x sM>' for vector type");
+ uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ lex();
+
+ if (Token.isNot(MIToken::greater))
+ return error(Loc, "expected '<N x sM>' for vector type");
+ lex();
+
+ Ty = LLT::vector(NumElements, ScalarSize);
return false;
}
@@ -1072,7 +1143,8 @@ bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
auto Loc = Token.location();
lex();
- if (Token.isNot(MIToken::FloatingPointLiteral))
+ if (Token.isNot(MIToken::FloatingPointLiteral) &&
+ Token.isNot(MIToken::HexLiteral))
return error("expected a floating point literal");
const Constant *C = nullptr;
if (parseIRConstant(Loc, C))
@@ -1082,13 +1154,24 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
}
bool MIParser::getUnsigned(unsigned &Result) {
- assert(Token.hasIntegerValue() && "Expected a token with an integer value");
- const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
- uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
- if (Val64 == Limit)
- return error("expected 32-bit integer (too large)");
- Result = Val64;
- return false;
+ if (Token.hasIntegerValue()) {
+ const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+ uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+ if (Val64 == Limit)
+ return error("expected 32-bit integer (too large)");
+ Result = Val64;
+ return false;
+ }
+ if (Token.is(MIToken::HexLiteral)) {
+ APInt A;
+ if (getHexUint(A))
+ return true;
+ if (A.getBitWidth() > 32)
+ return error("expected 32-bit integer (too large)");
+ Result = A.getZExtValue();
+ return false;
+ }
+ return true;
}
bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
@@ -1128,7 +1211,7 @@ bool MIParser::parseStackFrameIndex(int &FI) {
"'");
StringRef Name;
if (const auto *Alloca =
- MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+ MF.getFrameInfo().getObjectAllocation(ObjectInfo->second))
Name = Alloca->getName();
if (!Token.stringValue().empty() && Token.stringValue() != Name)
return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
@@ -1293,7 +1376,7 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a cfi register");
unsigned LLVMReg;
- if (parseRegister(LLVMReg))
+ if (parseNamedRegister(LLVMReg))
return true;
const auto *TRI = MF.getSubtarget().getRegisterInfo();
assert(TRI && "Expected target register info");
@@ -1308,7 +1391,6 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
bool MIParser::parseCFIOperand(MachineOperand &Dest) {
auto Kind = Token.kind();
lex();
- auto &MMI = MF.getMMI();
int Offset;
unsigned Reg;
unsigned CFIIndex;
@@ -1316,27 +1398,26 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
case MIToken::kw_cfi_same_value:
if (parseCFIRegister(Reg))
return true;
- CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
break;
case MIToken::kw_cfi_offset:
if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
parseCFIOffset(Offset))
return true;
CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+ MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
break;
case MIToken::kw_cfi_def_cfa_register:
if (parseCFIRegister(Reg))
return true;
CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
break;
case MIToken::kw_cfi_def_cfa_offset:
if (parseCFIOffset(Offset))
return true;
// NB: MCCFIInstruction::createDefCfaOffset negates the offset.
- CFIIndex = MMI.addFrameInst(
+ CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
break;
case MIToken::kw_cfi_def_cfa:
@@ -1345,7 +1426,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
return true;
// NB: MCCFIInstruction::createDefCfa negates the offset.
CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+ MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
break;
default:
// TODO: Parse the other CFI operands.
@@ -1359,7 +1440,7 @@ bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
switch (Token.kind()) {
case MIToken::NamedIRBlock: {
BB = dyn_cast_or_null<BasicBlock>(
- F.getValueSymbolTable().lookup(Token.stringValue()));
+ F.getValueSymbolTable()->lookup(Token.stringValue()));
if (!BB)
return error(Twine("use of undefined IR block '") + Token.range() + "'");
break;
@@ -1411,6 +1492,93 @@ bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_intrinsic));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax intrinsic(@llvm.whatever)");
+
+ if (Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected syntax intrinsic(@llvm.whatever)");
+
+ std::string Name = Token.stringValue();
+ lex();
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("expected ')' to terminate intrinsic name");
+
+ // Find out what intrinsic we're dealing with, first try the global namespace
+ // and then the target's private intrinsics if that fails.
+ const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+ Intrinsic::ID ID = Function::lookupIntrinsicID(Name);
+ if (ID == Intrinsic::not_intrinsic && TII)
+ ID = static_cast<Intrinsic::ID>(TII->lookupName(Name));
+
+ if (ID == Intrinsic::not_intrinsic)
+ return error("unknown intrinsic name");
+ Dest = MachineOperand::CreateIntrinsicID(ID);
+
+ return false;
+}
+
+bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred));
+ bool IsFloat = Token.is(MIToken::kw_floatpred);
+ lex();
+
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax intpred(whatever) or floatpred(whatever");
+
+ if (Token.isNot(MIToken::Identifier))
+ return error("whatever");
+
+ CmpInst::Predicate Pred;
+ if (IsFloat) {
+ Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+ .Case("false", CmpInst::FCMP_FALSE)
+ .Case("oeq", CmpInst::FCMP_OEQ)
+ .Case("ogt", CmpInst::FCMP_OGT)
+ .Case("oge", CmpInst::FCMP_OGE)
+ .Case("olt", CmpInst::FCMP_OLT)
+ .Case("ole", CmpInst::FCMP_OLE)
+ .Case("one", CmpInst::FCMP_ONE)
+ .Case("ord", CmpInst::FCMP_ORD)
+ .Case("uno", CmpInst::FCMP_UNO)
+ .Case("ueq", CmpInst::FCMP_UEQ)
+ .Case("ugt", CmpInst::FCMP_UGT)
+ .Case("uge", CmpInst::FCMP_UGE)
+ .Case("ult", CmpInst::FCMP_ULT)
+ .Case("ule", CmpInst::FCMP_ULE)
+ .Case("une", CmpInst::FCMP_UNE)
+ .Case("true", CmpInst::FCMP_TRUE)
+ .Default(CmpInst::BAD_FCMP_PREDICATE);
+ if (!CmpInst::isFPPredicate(Pred))
+ return error("invalid floating-point predicate");
+ } else {
+ Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+ .Case("eq", CmpInst::ICMP_EQ)
+ .Case("ne", CmpInst::ICMP_NE)
+ .Case("sgt", CmpInst::ICMP_SGT)
+ .Case("sge", CmpInst::ICMP_SGE)
+ .Case("slt", CmpInst::ICMP_SLT)
+ .Case("sle", CmpInst::ICMP_SLE)
+ .Case("ugt", CmpInst::ICMP_UGT)
+ .Case("uge", CmpInst::ICMP_UGE)
+ .Case("ult", CmpInst::ICMP_ULT)
+ .Case("ule", CmpInst::ICMP_ULE)
+ .Default(CmpInst::BAD_ICMP_PREDICATE);
+ if (!CmpInst::isIntPredicate(Pred))
+ return error("invalid integer predicate");
+ }
+
+ lex();
+ Dest = MachineOperand::CreatePredicate(Pred);
+ if (expectAndConsume(MIToken::rparen))
+ return error("predicate should be terminated by ')'.");
+
+ return false;
+}
+
bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_target_index));
lex();
@@ -1441,8 +1609,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
while (true) {
if (Token.isNot(MIToken::NamedRegister))
return error("expected a named register");
- unsigned Reg = 0;
- if (parseRegister(Reg))
+ unsigned Reg;
+ if (parseNamedRegister(Reg))
return true;
lex();
Mask[Reg / 32] |= 1U << (Reg % 32);
@@ -1511,10 +1679,15 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseCFIOperand(Dest);
case MIToken::kw_blockaddress:
return parseBlockAddressOperand(Dest);
+ case MIToken::kw_intrinsic:
+ return parseIntrinsicOperand(Dest);
case MIToken::kw_target_index:
return parseTargetIndexOperand(Dest);
case MIToken::kw_liveout:
return parseLiveoutRegisterMaskOperand(Dest);
+ case MIToken::kw_floatpred:
+ case MIToken::kw_intpred:
+ return parsePredicateOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
@@ -1523,7 +1696,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
lex();
break;
}
- // fallthrough
+ LLVM_FALLTHROUGH;
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -1613,7 +1786,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
bool MIParser::parseIRValue(const Value *&V) {
switch (Token.kind()) {
case MIToken::NamedIRValue: {
- V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+ V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue());
break;
}
case MIToken::IRValue: {
@@ -1647,10 +1820,35 @@ bool MIParser::parseIRValue(const Value *&V) {
}
bool MIParser::getUint64(uint64_t &Result) {
- assert(Token.hasIntegerValue());
- if (Token.integerValue().getActiveBits() > 64)
- return error("expected 64-bit integer (too large)");
- Result = Token.integerValue().getZExtValue();
+ if (Token.hasIntegerValue()) {
+ if (Token.integerValue().getActiveBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = Token.integerValue().getZExtValue();
+ return false;
+ }
+ if (Token.is(MIToken::HexLiteral)) {
+ APInt A;
+ if (getHexUint(A))
+ return true;
+ if (A.getBitWidth() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = A.getZExtValue();
+ return false;
+ }
+ return true;
+}
+
+bool MIParser::getHexUint(APInt &Result) {
+ assert(Token.is(MIToken::HexLiteral));
+ StringRef S = Token.range();
+ assert(S[0] == '0' && tolower(S[1]) == 'x');
+ // This could be a floating point literal with a special prefix.
+ if (!isxdigit(S[2]))
+ return true;
+ StringRef V = S.substr(2);
+ APInt A(V.size()*4, V, 16);
+ Result = APInt(A.getActiveBits(),
+ ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
return false;
}
@@ -1663,6 +1861,9 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
case MIToken::kw_non_temporal:
Flags |= MachineMemOperand::MONonTemporal;
break;
+ case MIToken::kw_dereferenceable:
+ Flags |= MachineMemOperand::MODereferenceable;
+ break;
case MIToken::kw_invariant:
Flags |= MachineMemOperand::MOInvariant;
break;
@@ -2059,36 +2260,42 @@ bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
}
-bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS,
StringRef Src, SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseBasicBlocks();
}
-bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS,
MachineBasicBlock *&MBB, StringRef Src,
SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
}
-bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg);
+}
+
+bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS,
unsigned &Reg, StringRef Src,
SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
}
-bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
+bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+ VRegInfo *&Info, StringRef Src,
SMDiagnostic &Error) {
- return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg);
+ return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info);
}
-bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS,
int &FI, StringRef Src,
SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
}
-bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 18895b9..93a4d84 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -26,26 +26,42 @@ class MachineFunction;
class MachineInstr;
class MachineRegisterInfo;
class MDNode;
+class RegisterBank;
struct SlotMapping;
class SMDiagnostic;
class SourceMgr;
+class TargetRegisterClass;
+
+struct VRegInfo {
+ enum uint8_t {
+ UNKNOWN, NORMAL, GENERIC, REGBANK
+ } Kind = UNKNOWN;
+ bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
+ union {
+ const TargetRegisterClass *RC;
+ const RegisterBank *RegBank;
+ } D;
+ unsigned VReg;
+ unsigned PreferredReg = 0;
+};
struct PerFunctionMIParsingState {
+ BumpPtrAllocator Allocator;
MachineFunction &MF;
SourceMgr *SM;
const SlotMapping &IRSlots;
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
- DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+ DenseMap<unsigned, VRegInfo*> VRegInfos;
DenseMap<unsigned, int> FixedStackObjectSlots;
DenseMap<unsigned, int> StackObjectSlots;
DenseMap<unsigned, unsigned> ConstantPoolSlots;
DenseMap<unsigned, unsigned> JumpTableSlots;
- /// Hold the generic virtual registers.
- SmallSet<unsigned, 8> GenericVRegs;
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
const SlotMapping &IRSlots);
+
+ VRegInfo &getVRegInfo(unsigned VReg);
};
/// Parse the machine basic block definitions, and skip the machine
@@ -73,26 +89,29 @@ bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
/// on the given source string.
///
/// Return true if an error occurred.
-bool parseMachineInstructions(const PerFunctionMIParsingState &PFS,
- StringRef Src, SMDiagnostic &Error);
+bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
+ SMDiagnostic &Error);
-bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool parseMBBReference(PerFunctionMIParsingState &PFS,
MachineBasicBlock *&MBB, StringRef Src,
SMDiagnostic &Error);
-bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
- SMDiagnostic &Error);
+bool parseRegisterReference(PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+ StringRef Src, SMDiagnostic &Error);
-bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
+bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+ VRegInfo *&Info, StringRef Src,
SMDiagnostic &Error);
-bool parseStackObjectReference(const PerFunctionMIParsingState &PFS,
- int &FI, StringRef Src, SMDiagnostic &Error);
+bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
+ StringRef Src, SMDiagnostic &Error);
-bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
- StringRef Src, SMDiagnostic &Error);
+bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
+ SMDiagnostic &Error);
} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 4aa3df6..3dff114 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -102,10 +102,10 @@ public:
/// Return true if error occurred.
bool initializeMachineFunction(MachineFunction &MF);
- bool initializeRegisterInfo(PerFunctionMIParsingState &PFS,
- const yaml::MachineFunction &YamlMF);
+ bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
- void inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+ bool setupRegisterInfo(const PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF);
bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
@@ -128,10 +128,10 @@ public:
const yaml::MachineJumpTable &YamlJTI);
private:
- bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
const yaml::StringValue &Source);
- bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+ bool parseMBBReference(PerFunctionMIParsingState &PFS,
MachineBasicBlock *&MBB,
const yaml::StringValue &Source);
@@ -160,6 +160,8 @@ private:
///
/// Return null if the name isn't a register bank.
const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
+
+ void computeFunctionProperties(MachineFunction &MF);
};
} // end namespace llvm
@@ -255,7 +257,8 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
bool NoLLVMIR) {
auto MF = llvm::make_unique<yaml::MachineFunction>();
- yaml::yamlize(In, *MF, false);
+ yaml::EmptyContext Ctx;
+ yaml::yamlize(In, *MF, false, Ctx);
if (In.error())
return true;
auto FunctionName = MF->Name;
@@ -279,6 +282,43 @@ void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
new UnreachableInst(Context, BB);
}
+static bool isSSA(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
+ return false;
+ }
+ return true;
+}
+
+void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
+ MachineFunctionProperties &Properties = MF.getProperties();
+
+ bool HasPHI = false;
+ bool HasInlineAsm = false;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isPHI())
+ HasPHI = true;
+ if (MI.isInlineAsm())
+ HasInlineAsm = true;
+ }
+ }
+ if (!HasPHI)
+ Properties.set(MachineFunctionProperties::Property::NoPHIs);
+ MF.setHasInlineAsm(HasInlineAsm);
+
+ if (isSSA(MF))
+ Properties.set(MachineFunctionProperties::Property::IsSSA);
+ else
+ Properties.reset(MachineFunctionProperties::Property::IsSSA);
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.getNumVirtRegs() == 0)
+ Properties.set(MachineFunctionProperties::Property::NoVRegs);
+}
+
bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
auto It = Functions.find(MF.getName());
if (It == Functions.end())
@@ -289,11 +329,17 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
- MF.setHasInlineAsm(YamlMF.HasInlineAsm);
- if (YamlMF.AllVRegsAllocated)
- MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated);
+
+ if (YamlMF.Legalized)
+ MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
+ if (YamlMF.RegBankSelected)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::RegBankSelected);
+ if (YamlMF.Selected)
+ MF.getProperties().set(MachineFunctionProperties::Property::Selected);
+
PerFunctionMIParsingState PFS(MF, SM, IRSlots);
- if (initializeRegisterInfo(PFS, YamlMF))
+ if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
auto *ConstantPool = MF.getConstantPool();
@@ -343,62 +389,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
}
PFS.SM = &SM;
- inferRegisterInfo(PFS, YamlMF);
- // FIXME: This is a temporary workaround until the reserved registers can be
- // serialized.
- MF.getRegInfo().freezeReservedRegs(MF);
+ if (setupRegisterInfo(PFS, YamlMF))
+ return true;
+
+ computeFunctionProperties(MF);
+
MF.verify();
return false;
}
-bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
- const yaml::MachineFunction &YamlMF) {
+bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- assert(RegInfo.isSSA());
- if (!YamlMF.IsSSA)
- RegInfo.leaveSSA();
assert(RegInfo.tracksLiveness());
if (!YamlMF.TracksRegLiveness)
RegInfo.invalidateLiveness();
- RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
SMDiagnostic Error;
// Parse the virtual register information.
for (const auto &VReg : YamlMF.VirtualRegisters) {
- unsigned Reg;
+ VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value);
+ if (Info.Explicit)
+ return error(VReg.ID.SourceRange.Start,
+ Twine("redefinition of virtual register '%") +
+ Twine(VReg.ID.Value) + "'");
+ Info.Explicit = true;
+
if (StringRef(VReg.Class.Value).equals("_")) {
- // This is a generic virtual register.
- // The size will be set appropriately when we reach the definition.
- Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
- PFS.GenericVRegs.insert(Reg);
+ Info.Kind = VRegInfo::GENERIC;
} else {
const auto *RC = getRegClass(MF, VReg.Class.Value);
if (RC) {
- Reg = RegInfo.createVirtualRegister(RC);
+ Info.Kind = VRegInfo::NORMAL;
+ Info.D.RC = RC;
} else {
- const auto *RegBank = getRegBank(MF, VReg.Class.Value);
+ const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
if (!RegBank)
return error(
VReg.Class.SourceRange.Start,
Twine("use of undefined register class or register bank '") +
VReg.Class.Value + "'");
- Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
- RegInfo.setRegBank(Reg, *RegBank);
- PFS.GenericVRegs.insert(Reg);
+ Info.Kind = VRegInfo::REGBANK;
+ Info.D.RegBank = RegBank;
}
}
- if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
- .second)
- return error(VReg.ID.SourceRange.Start,
- Twine("redefinition of virtual register '%") +
- Twine(VReg.ID.Value) + "'");
+
if (!VReg.PreferredRegister.Value.empty()) {
- unsigned PreferredReg = 0;
- if (parseNamedRegisterReference(PFS, PreferredReg,
- VReg.PreferredRegister.Value, Error))
+ if (Info.Kind != VRegInfo::NORMAL)
+ return error(VReg.Class.SourceRange.Start,
+ Twine("preferred register can only be set for normal vregs"));
+
+ if (parseRegisterReference(PFS, Info.PreferredReg,
+ VReg.PreferredRegister.Value, Error))
return error(Error, VReg.PreferredRegister.SourceRange);
- RegInfo.setSimpleHint(Reg, PreferredReg);
}
}
@@ -409,9 +453,11 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
return error(Error, LiveIn.Register.SourceRange);
unsigned VReg = 0;
if (!LiveIn.VirtualRegister.Value.empty()) {
- if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value,
+ VRegInfo *Info;
+ if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value,
Error))
return error(Error, LiveIn.VirtualRegister.SourceRange);
+ VReg = Info->VReg;
}
RegInfo.addLiveIn(Reg, VReg);
}
@@ -430,26 +476,57 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
return false;
}
-void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
- if (YamlMF.CalleeSavedRegisters)
- return;
- MachineRegisterInfo &MRI = PFS.MF.getRegInfo();
- for (const MachineBasicBlock &MBB : PFS.MF) {
- for (const MachineInstr &MI : MBB) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+ MachineFunction &MF = PFS.MF;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Error = false;
+ // Create VRegs
+ for (auto P : PFS.VRegInfos) {
+ const VRegInfo &Info = *P.second;
+ unsigned Reg = Info.VReg;
+ switch (Info.Kind) {
+ case VRegInfo::UNKNOWN:
+ error(Twine("Cannot determine class/bank of virtual register ") +
+ Twine(P.first) + " in function '" + MF.getName() + "'");
+ Error = true;
+ break;
+ case VRegInfo::NORMAL:
+ MRI.setRegClass(Reg, Info.D.RC);
+ if (Info.PreferredReg != 0)
+ MRI.setSimpleHint(Reg, Info.PreferredReg);
+ break;
+ case VRegInfo::GENERIC:
+ break;
+ case VRegInfo::REGBANK:
+ MRI.setRegBank(Reg, *Info.D.RegBank);
+ break;
+ }
+ }
+
+ // Compute MachineRegisterInfo::UsedPhysRegMask
+ if (!YamlMF.CalleeSavedRegisters) {
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+ }
}
}
}
+
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MRI.freezeReservedRegs(MF);
+ return Error;
}
bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
const Function &F = *MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -507,7 +584,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::StringValue &Name = Object.Name;
if (!Name.Value.empty()) {
Alloca = dyn_cast_or_null<AllocaInst>(
- F.getValueSymbolTable().lookup(Name.Value));
+ F.getValueSymbolTable()->lookup(Name.Value));
if (!Alloca)
return error(Name.SourceRange.Start,
"alloca instruction named '" + Name.Value +
@@ -597,11 +674,11 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
return true;
- PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
return false;
}
-bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS,
MDNode *&Node, const yaml::StringValue &Source) {
if (Source.Value.empty())
return false;
@@ -657,7 +734,7 @@ bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
return false;
}
-bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS,
MachineBasicBlock *&MBB,
const yaml::StringValue &Source) {
SMDiagnostic Error;
@@ -784,6 +861,14 @@ std::unique_ptr<MIRParser>
llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
LLVMContext &Context) {
auto Filename = Contents->getBufferIdentifier();
+ if (Context.shouldDiscardValueNames()) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error,
+ SMDiagnostic(
+ Filename, SourceMgr::DK_Error,
+ "Can't read MIR with a Context that discards named Values")));
+ return nullptr;
+ }
return llvm::make_unique<MIRParser>(
llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index 703c99d..db87092 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,6 +14,7 @@
#include "MIRPrinter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -27,13 +28,16 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -86,10 +90,8 @@ public:
const MachineConstantPool &ConstantPool);
void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
const MachineJumpTableInfo &JTI);
- void convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
- ModuleSlotTracker &MST,
- const TargetRegisterInfo *TRI);
+ void convertStackObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF, ModuleSlotTracker &MST);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -121,7 +123,7 @@ public:
void printTargetFlags(const MachineOperand &Op);
void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
unsigned I, bool ShouldPrintRegisterTies,
- const MachineRegisterInfo *MRI = nullptr, bool IsDef = false);
+ LLT TypeToPrint, bool IsDef = false);
void print(const MachineMemOperand &Op);
void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
@@ -172,16 +174,19 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Name = MF.getName();
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
- YamlMF.HasInlineAsm = MF.hasInlineAsm();
- YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+
+ YamlMF.Legalized = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ YamlMF.RegBankSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ YamlMF.Selected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
ModuleSlotTracker MST(MF.getFunction()->getParent());
MST.incorporateFunction(*MF.getFunction());
- convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
- convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
- MF.getSubtarget().getRegisterInfo());
+ convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
+ convertStackObjects(YamlMF, MF, MST);
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -203,9 +208,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
- MF.IsSSA = RegInfo.isSSA();
MF.TracksRegLiveness = RegInfo.tracksLiveness();
- MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
// Print the virtual register definitions.
for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
@@ -219,7 +222,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
else {
VReg.Class = std::string("_");
- assert(RegInfo.getSize(Reg) && "Generic registers must have a size");
+ assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+ "Generic registers must have a valid type");
}
unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
if (PreferredReg)
@@ -279,11 +283,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
}
}
-void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
- const MachineFrameInfo &MFI,
- MachineModuleInfo &MMI,
- ModuleSlotTracker &MST,
- const TargetRegisterInfo *TRI) {
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// Process fixed stack objects.
unsigned ID = 0;
for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -300,7 +304,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.Alignment = MFI.getObjectAlignment(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
- MF.FixedStackObjects.push_back(YamlObject);
+ YMF.FixedStackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(
std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
}
@@ -325,7 +329,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
- MF.StackObjects.push_back(YamlObject);
+ YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
}
@@ -338,9 +342,9 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
"Invalid stack object index");
const FrameIndexOperand &StackObject = StackObjectInfo->second;
if (StackObject.IsFixed)
- MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
else
- MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
auto LocalObject = MFI.getLocalFrameObjectMap(I);
@@ -349,26 +353,26 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
"Invalid stack object index");
const FrameIndexOperand &StackObject = StackObjectInfo->second;
assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
- MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
}
// Print the stack object references in the frame information class after
// converting the stack objects.
if (MFI.hasStackProtectorIndex()) {
- raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+ raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value);
MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
.printStackObjectReference(MFI.getStackProtectorIndex());
}
// Print the debug variable information.
- for (MachineModuleInfo::VariableDbgInfo &DebugVar :
- MMI.getVariableDbgInfo()) {
+ for (const MachineFunction::VariableDbgInfo &DebugVar :
+ MF.getVariableDbgInfo()) {
auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
assert(StackObjectInfo != StackObjectOperandMapping.end() &&
"Invalid stack object index");
const FrameIndexOperand &StackObject = StackObjectInfo->second;
assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
- auto &Object = MF.StackObjects[StackObject.ID];
+ auto &Object = YMF.StackObjects[StackObject.ID];
{
raw_string_ostream StrOS(Object.DebugVar.Value);
DebugVar.Var->printAsOperand(StrOS, MST);
@@ -475,25 +479,27 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
OS << ", ";
printMBBReference(**I);
if (MBB.hasSuccessorProbabilities())
- OS << '(' << MBB.getSuccProbability(I) << ')';
+ OS << '('
+ << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
+ << ')';
}
OS << "\n";
HasLineAttributes = true;
}
// Print the live in registers.
- const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- if (!MBB.livein_empty()) {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
OS.indent(2) << "liveins: ";
bool First = true;
for (const auto &LI : MBB.liveins()) {
if (!First)
OS << ", ";
First = false;
- printReg(LI.PhysReg, OS, TRI);
- if (LI.LaneMask != ~0u)
- OS << ':' << PrintLaneMask(LI.LaneMask);
+ printReg(LI.PhysReg, OS, &TRI);
+ if (!LI.LaneMask.all())
+ OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
OS << "\n";
HasLineAttributes = true;
@@ -537,6 +543,27 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) {
return false;
}
+static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
+ SmallBitVector &PrintedTypes,
+ const MachineRegisterInfo &MRI) {
+ const MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg())
+ return LLT{};
+
+ if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
+ return MRI.getType(Op.getReg());
+
+ auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
+ if (!OpInfo.isGenericType())
+ return MRI.getType(Op.getReg());
+
+ if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+ return LLT{};
+
+ PrintedTypes.set(OpInfo.getGenericTypeIndex());
+ return MRI.getType(Op.getReg());
+}
+
void MIPrinter::print(const MachineInstr &MI) {
const auto *MF = MI.getParent()->getParent();
const auto &MRI = MF->getRegInfo();
@@ -548,6 +575,7 @@ void MIPrinter::print(const MachineInstr &MI) {
if (MI.isCFIInstruction())
assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+ SmallBitVector PrintedTypes(8);
bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
unsigned I = 0, E = MI.getNumOperands();
for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
@@ -555,7 +583,8 @@ void MIPrinter::print(const MachineInstr &MI) {
++I) {
if (I)
OS << ", ";
- print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+ getTypeToPrint(MI, I, PrintedTypes, MRI),
/*IsDef=*/true);
}
@@ -564,11 +593,6 @@ void MIPrinter::print(const MachineInstr &MI) {
if (MI.getFlag(MachineInstr::FrameSetup))
OS << "frame-setup ";
OS << TII->getName(MI.getOpcode());
- if (isPreISelGenericOpcode(MI.getOpcode())) {
- assert(MI.getType() && "Generic instructions must have a type");
- OS << ' ';
- MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
- }
if (I < E)
OS << ' ';
@@ -576,7 +600,8 @@ void MIPrinter::print(const MachineInstr &MI) {
for (; I < E; ++I) {
if (NeedComma)
OS << ", ";
- print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+ getTypeToPrint(MI, I, PrintedTypes, MRI));
NeedComma = true;
}
@@ -748,8 +773,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
}
void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
- unsigned I, bool ShouldPrintRegisterTies,
- const MachineRegisterInfo *MRI, bool IsDef) {
+ unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint,
+ bool IsDef) {
printTargetFlags(Op);
switch (Op.getType()) {
case MachineOperand::MO_Register:
@@ -773,12 +798,11 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
printReg(Op.getReg(), OS, TRI);
// Print the sub register.
if (Op.getSubReg() != 0)
- OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+ OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
- assert((!IsDef || MRI) && "for IsDef, MRI must be provided");
- if (IsDef && MRI->getSize(Op.getReg()))
- OS << '(' << MRI->getSize(Op.getReg()) << ')';
+ if (TypeToPrint.isValid())
+ OS << '(' << TypeToPrint << ')';
break;
case MachineOperand::MO_Immediate:
OS << Op.getImm();
@@ -861,8 +885,25 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
break;
case MachineOperand::MO_CFIIndex: {
- const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
- print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+ const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+ print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
+ break;
+ }
+ case MachineOperand::MO_IntrinsicID: {
+ Intrinsic::ID ID = Op.getIntrinsicID();
+ if (ID < Intrinsic::num_intrinsics)
+ OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
+ else {
+ const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+ const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+ OS << "intrinsic(@" << TII->getName(ID) << ')';
+ }
+ break;
+ }
+ case MachineOperand::MO_Predicate: {
+ auto Pred = static_cast<CmpInst::Predicate>(Op.getPredicate());
+ OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
+ << CmpInst::getPredicateName(Pred) << ')';
break;
}
}
@@ -875,6 +916,8 @@ void MIPrinter::print(const MachineMemOperand &Op) {
OS << "volatile ";
if (Op.isNonTemporal())
OS << "non-temporal ";
+ if (Op.isDereferenceable())
+ OS << "dereferenceable ";
if (Op.isInvariant())
OS << "invariant ";
if (Op.isLoad())
@@ -917,6 +960,9 @@ void MIPrinter::print(const MachineMemOperand &Op) {
printLLVMNameWithoutPrefix(
OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
break;
+ case PseudoSourceValue::TargetCustom:
+ llvm_unreachable("TargetCustom pseudo source values are not supported");
+ break;
}
}
printOffset(Op.getOffset());
@@ -956,32 +1002,32 @@ void MIPrinter::print(const MCCFIInstruction &CFI,
const TargetRegisterInfo *TRI) {
switch (CFI.getOperation()) {
case MCCFIInstruction::OpSameValue:
- OS << ".cfi_same_value ";
+ OS << "same_value ";
if (CFI.getLabel())
OS << "<mcsymbol> ";
printCFIRegister(CFI.getRegister(), OS, TRI);
break;
case MCCFIInstruction::OpOffset:
- OS << ".cfi_offset ";
+ OS << "offset ";
if (CFI.getLabel())
OS << "<mcsymbol> ";
printCFIRegister(CFI.getRegister(), OS, TRI);
OS << ", " << CFI.getOffset();
break;
case MCCFIInstruction::OpDefCfaRegister:
- OS << ".cfi_def_cfa_register ";
+ OS << "def_cfa_register ";
if (CFI.getLabel())
OS << "<mcsymbol> ";
printCFIRegister(CFI.getRegister(), OS, TRI);
break;
case MCCFIInstruction::OpDefCfaOffset:
- OS << ".cfi_def_cfa_offset ";
+ OS << "def_cfa_offset ";
if (CFI.getLabel())
OS << "<mcsymbol> ";
OS << CFI.getOffset();
break;
case MCCFIInstruction::OpDefCfa:
- OS << ".cfi_def_cfa ";
+ OS << "def_cfa ";
if (CFI.getLabel())
OS << "<mcsymbol> ";
printCFIRegister(CFI.getRegister(), OS, TRI);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 8e7566a..c690bcf 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -33,7 +33,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
- const char *getPassName() const override { return "MIR Printing Pass"; }
+ StringRef getPassName() const override { return "MIR Printing Pass"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 689dd07..3869f97 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -51,7 +51,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
- const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
@@ -74,7 +74,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
/// gets the next available unique MBB number. If it is removed from a
/// MachineFunction, it goes back to being #-1.
-void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
+ MachineBasicBlock *N) {
MachineFunction &MF = *N->getParent();
N->Number = MF.addToMBBNumbering(N);
@@ -85,7 +86,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
I->AddRegOperandsToUseLists(RegInfo);
}
-void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
+ MachineBasicBlock *N) {
N->getParent()->removeFromMBBNumbering(N->Number);
N->Number = -1;
}
@@ -116,15 +118,13 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
/// When moving a range of instructions from one MBB list to another, we need to
/// update the parent pointers and the use/def lists.
-void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &FromList,
- ilist_iterator<MachineInstr> First,
- ilist_iterator<MachineInstr> Last) {
+void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
+ instr_iterator First,
+ instr_iterator Last) {
assert(Parent->getParent() == FromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
-
- // Splice within the same MBB -> no change.
- if (Parent == FromList.Parent) return;
+ assert(this != &FromList && "Called without a real transfer...");
+ assert(Parent != FromList.Parent && "Two lists have the same parent?");
// If splicing between two blocks within the same function, just update the
// parent pointers.
@@ -132,7 +132,7 @@ transferNodesFromList(ilist_traits<MachineInstr> &FromList,
First->setParent(Parent);
}
-void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
assert(!MI->getParent() && "MI is still in a block!");
Parent->getParent()->DeleteMachineInstr(MI);
}
@@ -149,12 +149,25 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
iterator E = end();
+ while (I != E && (I->isPHI() || I->isPosition()))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+ iterator E = end();
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
assert((I == E || !I->isInsideBundle()) &&
- "First non-phi / non-label instruction is inside a bundle!");
+ "First non-phi / non-label / non-debug "
+ "instruction is inside a bundle!");
return I;
}
@@ -178,10 +191,7 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
// Skip over begin-of-block dbg_value instructions.
- iterator I = begin(), E = end();
- while (I != E && I->isDebugValue())
- ++I;
- return I;
+ return skipDebugInstructionsForward(begin(), end());
}
MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
@@ -276,9 +286,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!livein_empty()) {
if (Indexes) OS << '\t';
OS << " Live Ins:";
- for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ for (const auto &LI : LiveIns) {
OS << ' ' << PrintReg(LI.PhysReg, TRI);
- if (LI.LaneMask != ~0u)
+ if (!LI.LaneMask.all())
OS << ':' << PrintLaneMask(LI.LaneMask);
}
OS << '\n';
@@ -323,22 +333,20 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS,
}
void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
- LiveInVector::iterator I = std::find_if(
- LiveIns.begin(), LiveIns.end(),
- [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ LiveInVector::iterator I = find_if(
+ LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
if (I == LiveIns.end())
return;
I->LaneMask &= ~LaneMask;
- if (I->LaneMask == 0)
+ if (I->LaneMask.none())
LiveIns.erase(I);
}
bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
- livein_iterator I = std::find_if(
- LiveIns.begin(), LiveIns.end(),
- [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
- return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+ livein_iterator I = find_if(
+ LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ return I != livein_end() && (I->LaneMask & LaneMask).any();
}
void MachineBasicBlock::sortUniqueLiveIns() {
@@ -418,7 +426,7 @@ void MachineBasicBlock::updateTerminator() {
// The block has an unconditional branch. If its successor is now its
// layout successor, delete the branch.
if (isLayoutSuccessor(TBB))
- TII->RemoveBranch(*this);
+ TII->removeBranch(*this);
} else {
// The block has an unconditional fallthrough. If its successor is not its
// layout successor, insert a branch. First we have to locate the only
@@ -438,7 +446,7 @@ void MachineBasicBlock::updateTerminator() {
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
}
return;
}
@@ -448,13 +456,13 @@ void MachineBasicBlock::updateTerminator() {
// successors is its layout successor, rewrite it to a fallthrough
// conditional branch.
if (isLayoutSuccessor(TBB)) {
- if (TII->ReverseBranchCondition(Cond))
+ if (TII->reverseBranchCondition(Cond))
return;
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, FBB, nullptr, Cond, DL);
} else if (isLayoutSuccessor(FBB)) {
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
}
return;
}
@@ -476,37 +484,37 @@ void MachineBasicBlock::updateTerminator() {
// Remove the conditional jump, leaving unconditional fallthrough.
// FIXME: This does not seem like a reasonable pattern to support, but it
// has been seen in the wild coming out of degenerate ARM test cases.
- TII->RemoveBranch(*this);
+ TII->removeBranch(*this);
// Finally update the unconditional successor to be reached via a branch if
// it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
return;
}
// We enter here iff exactly one successor is TBB which cannot fallthrough
// and the rest successors if any are EHPads. In this case, we need to
// change the conditional branch into unconditional branch.
- TII->RemoveBranch(*this);
+ TII->removeBranch(*this);
Cond.clear();
- TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ TII->insertBranch(*this, TBB, nullptr, Cond, DL);
return;
}
// The block has a fallthrough conditional branch.
if (isLayoutSuccessor(TBB)) {
- if (TII->ReverseBranchCondition(Cond)) {
+ if (TII->reverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
return;
}
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
} else if (!isLayoutSuccessor(FallthroughBB)) {
- TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
+ TII->removeBranch(*this);
+ TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL);
}
}
@@ -545,7 +553,7 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
bool NormalizeSuccProbs) {
- succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+ succ_iterator I = find(Successors, Succ);
removeSuccessor(I, NormalizeSuccProbs);
}
@@ -611,7 +619,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
}
void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
- pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
+ pred_iterator I = find(Predecessors, Pred);
assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
Predecessors.erase(I);
}
@@ -661,11 +669,11 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
}
bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
- return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+ return is_contained(predecessors(), MBB);
}
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
- return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+ return is_contained(successors(), MBB);
}
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -775,7 +783,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
continue;
unsigned Reg = OI->getReg();
- if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+ if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
}
@@ -802,9 +810,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
E = Terminators.end(); I != E; ++I) {
- if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
- NewTerminators.end())
- Indexes->removeMachineInstrFromMaps(**I);
+ if (!is_contained(NewTerminators, *I))
+ Indexes->removeMachineInstrFromMaps(**I);
}
}
@@ -813,7 +820,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (!NMBB->isLayoutSuccessor(Succ)) {
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
- TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
+ TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
if (Indexes) {
for (MachineInstr &MI : NMBB->instrs()) {
@@ -1090,16 +1097,16 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
bool Changed = false;
- MachineFunction::iterator FallThru = std::next(getIterator());
+ MachineBasicBlock *FallThru = getNextNode();
if (!DestA && !DestB) {
// Block falls through to successor.
- DestA = &*FallThru;
- DestB = &*FallThru;
+ DestA = FallThru;
+ DestB = FallThru;
} else if (DestA && !DestB) {
if (IsCond)
// Block ends in conditional jump that falls through to successor.
- DestB = &*FallThru;
+ DestB = FallThru;
} else {
assert(DestA && DestB && IsCond &&
"CFG in a bad state. Cannot correct CFG edges");
@@ -1130,17 +1137,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
/// instructions. Return UnknownLoc if there is none.
DebugLoc
MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
- DebugLoc DL;
- instr_iterator E = instr_end();
- if (MBBI == E)
- return DL;
-
// Skip debug declarations, we don't want a DebugLoc from them.
- while (MBBI != E && MBBI->isDebugValue())
- MBBI++;
- if (MBBI != E)
- DL = MBBI->getDebugLoc();
- return DL;
+ MBBI = skipDebugInstructionsForward(MBBI, instr_end());
+ if (MBBI != instr_end())
+ return MBBI->getDebugLoc();
+ return {};
}
/// Return probability of the edge from this block to MBB.
@@ -1287,3 +1288,14 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
// care what kind of return it is, putting a mask after it is a no-op.
return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
}
+
+void MachineBasicBlock::clearLiveIns() {
+ LiveIns.clear();
+}
+
+MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
+ assert(getParent()->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TracksLiveness) &&
+ "Liveness information is accurate");
+ return LiveIns.begin();
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 6c0f99f..7d5124d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -42,9 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"display a graph using the raw "
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
- "profile count if available."),
-
- clEnumValEnd));
+ "profile count if available.")));
extern cl::opt<std::string> ViewBlockFreqFuncName;
extern cl::opt<unsigned> ViewHotFreqPercent;
@@ -52,29 +50,26 @@ extern cl::opt<unsigned> ViewHotFreqPercent;
namespace llvm {
template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
- typedef const MachineBasicBlock NodeType;
+ typedef const MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
- typedef MachineFunction::const_iterator nodes_iterator;
+ typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
- static inline const NodeType *
- getEntryNode(const MachineBlockFrequencyInfo *G) {
+ static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
return &G->getFunction()->front();
}
- static ChildIteratorType child_begin(const NodeType *N) {
+ static ChildIteratorType child_begin(const NodeRef N) {
return N->succ_begin();
}
- static ChildIteratorType child_end(const NodeType *N) {
- return N->succ_end();
- }
+ static ChildIteratorType child_end(const NodeRef N) { return N->succ_end(); }
static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return nodes_iterator(G->getFunction()->begin());
}
static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
- return G->getFunction()->end();
+ return nodes_iterator(G->getFunction()->end());
}
};
@@ -175,6 +170,12 @@ Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
}
+Optional<uint64_t>
+MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+ const Function *F = MBFI->getFunction()->getFunction();
+ return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
+}
+
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
return MBFI ? MBFI->getFunction() : nullptr;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 03dda8b..40e3840 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -78,10 +79,14 @@ static cl::opt<unsigned> ExitBlockBias(
"over the original exit to be considered the new exit."),
cl::init(0), cl::Hidden);
+// Definition:
+// - Outlining: placement of a basic block outside the chain or hot path.
+
static cl::opt<bool> OutlineOptionalBranches(
"outline-optional-branches",
- cl::desc("Put completely optional branches, i.e. branches with a common "
- "post dominator, out of line."),
+ cl::desc("Outlining optional branches will place blocks that are optional "
+ "branches, i.e. branches with a common post dominator, outside "
+ "the hot path or chain"),
cl::init(false), cl::Hidden);
static cl::opt<unsigned> OutlineOptionalThreshold(
@@ -117,6 +122,12 @@ static cl::opt<unsigned> MisfetchCost(
static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+static cl::opt<bool>
+TailDupPlacement("tail-dup-placement",
+ cl::desc("Perform tail duplication during placement. "
+ "Creates more fallthrough opportunites in "
+ "outline branches."),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
BranchFoldPlacement("branch-fold-placement",
@@ -124,6 +135,14 @@ BranchFoldPlacement("branch-fold-placement",
"Reduces code size."),
cl::init(true), cl::Hidden);
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+ "tail-dup-placement-threshold",
+ cl::desc("Instruction cutoff for tail duplication during layout. "
+ "Tail merging during layout is forced to have a threshold "
+ "that won't conflict."), cl::init(2),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -181,6 +200,16 @@ public:
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
+ bool remove(MachineBasicBlock* BB) {
+ for(iterator i = begin(); i != end(); ++i) {
+ if (*i == BB) {
+ Blocks.erase(i);
+ return true;
+ }
+ }
+ return false;
+ }
+
/// \brief Merge a block chain into this one.
///
/// This routine merges a block chain into this one. It takes care of forming
@@ -235,7 +264,7 @@ public:
namespace {
class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
- typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+ typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
/// \brief work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
@@ -253,6 +282,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the loop info.
MachineLoopInfo *MLI;
+ /// \brief Preferred loop exit.
+ /// Member variable for convenience. It may be removed by duplication deep
+ /// in the call stack.
+ MachineBasicBlock *PreferredLoopExit;
+
/// \brief A handle to the target's instruction info.
const TargetInstrInfo *TII;
@@ -262,6 +296,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the post dominator tree.
MachineDominatorTree *MDT;
+ /// \brief Duplicator used to duplicate tails during placement.
+ ///
+ /// Placement decisions can open up new tail duplication opportunities, but
+ /// since tail duplication affects placement decisions of later blocks, it
+ /// must be done inline.
+ TailDuplicator TailDup;
+
/// \brief A set of blocks that are unavoidably execute, i.e. they dominate
/// all terminators of the MachineFunction.
SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
@@ -283,8 +324,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+#ifndef NDEBUG
+ /// The set of basic blocks that have terminators that cannot be fully
+ /// analyzed. These basic blocks cannot be re-ordered safely by
+ /// MachineBlockPlacement, and we must preserve physical layout of these
+ /// blocks and their successors through the pass.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
+#endif
+
+ /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
+ /// if the count goes to 0, add them to the appropriate work list.
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
+
+ /// Decrease the UnscheduledPredecessors count for a single block, and
+ /// if the count goes to 0, add them to the appropriate work list.
+ void markBlockSuccessors(
+ BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
+
+
BranchProbability
collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
const BlockFilterSet *BlockFilter,
@@ -294,6 +353,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet *BlockFilter,
BranchProbability SuccProb,
BranchProbability HotProb);
+ bool repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt);
+ bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ const BlockChain &Chain,
+ BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToPred);
bool
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
BlockChain &SuccChain, BranchProbability SuccProb,
@@ -319,7 +388,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter = nullptr);
+ BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineLoop &L,
@@ -384,37 +453,49 @@ static std::string getBlockName(MachineBasicBlock *BB) {
/// When a chain is being merged into the "placed" chain, this routine will
/// quickly walk the successors of each block in the chain and mark them as
/// having one fewer active predecessor. It also adds any successors of this
-/// chain which reach the zero-predecessor state to the worklist passed in.
+/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
for (MachineBasicBlock *MBB : Chain) {
- // Add any successors for which this is the only un-placed in-loop
- // predecessor to the worklist as a viable candidate for CFG-neutral
- // placement. No subsequent placement of this block will violate the CFG
- // shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock *Succ : MBB->successors()) {
- if (BlockFilter && !BlockFilter->count(Succ))
- continue;
- BlockChain &SuccChain = *BlockToChain[Succ];
- // Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || Succ == LoopHeaderBB)
- continue;
+ markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
+ }
+}
- // This is a cross-chain edge that is within the loop, so decrement the
- // loop predecessor count of the destination chain.
- if (SuccChain.UnscheduledPredecessors == 0 ||
- --SuccChain.UnscheduledPredecessors > 0)
- continue;
+/// \brief Mark a single block's successors as having one fewer preds.
+///
+/// Under normal circumstances, this is only called by markChainSuccessors,
+/// but if a block that was to be placed is completely tail-duplicated away,
+/// and was duplicated into the chain end, we need to redo markBlockSuccessors
+/// for just that block.
+void MachineBlockPlacement::markBlockSuccessors(
+ BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+ continue;
- auto *MBB = *SuccChain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
- else
- BlockWorkList.push_back(MBB);
- }
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *NewBB = *SuccChain.begin();
+ if (NewBB->isEHPad())
+ EHPadWorkList.push_back(NewBB);
+ else
+ BlockWorkList.push_back(NewBB);
}
}
@@ -627,16 +708,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// BB->Succ. This is equivalent to looking the CFG backward with backward
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
// profile data).
-
+ // --------------------------------------------------------------------------
+ // Case 3: forked diamond
+ // S
+ // / \
+ // / \
+ // BB Pred
+ // | \ / |
+ // | \ / |
+ // | X |
+ // | / \ |
+ // | / \ |
+ // S1 S2
+ //
+ // The current block is BB and edge BB->S1 is now being evaluated.
+ // As above S->BB was already selected because
+ // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
+ //
+ // topo-order:
+ //
+ // S-------| ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | Pred----| | S1----
+ // | | | |
+ // --(S1 or S2) ---Pred--
+ //
+ // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
+ // + min(freq(Pred->S1), freq(Pred->S2))
+ // Non-topo-order cost:
+ // In the worst case, S2 will not get laid out after Pred.
+ // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
+ // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
+ // is 0. Then the non topo layout is better when
+ // freq(S->Pred) < freq(BB->S1).
+ // This is exactly what is checked below.
+ // Note there are other shapes that apply (Pred may not be a single block,
+ // but they all fit this general pattern.)
BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
- // Forward checking. For case 2, SuccProb will be 1.
- if (SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob) (CFG conflict)\n");
- return true;
- }
-
// Make sure that a hot successor doesn't have a globally more
// important predecessor.
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
@@ -647,11 +758,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
(BlockFilter && !BlockFilter->count(Pred)) ||
BlockToChain[Pred] == &Chain)
continue;
- // Do backward checking. For case 1, it is actually redundant check. For
- // case 2 above, we need a backward checking to filter out edges that are
- // not 'strongly' biased. With profile data available, the check is mostly
- // redundant too (when threshold prob is set at 50%) unless S has more than
- // two successors.
+ // Do backward checking.
+ // For all cases above, we need a backward checking to filter out edges that
+ // are not 'strongly' biased. With profile data available, the check is
+ // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
+ // has more than two successors.
// BB Pred
// \ /
// Succ
@@ -660,6 +771,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
// HotProb
// i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+ // Case 1 is covered too, because the first equation reduces to:
+ // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
BlockFrequency PredEdgeFreq =
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
@@ -669,7 +782,7 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
}
if (BadCFGConflict) {
- DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ DEBUG(dbgs() << " Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb
<< " (prob) (non-cold CFG conflict)\n");
return true;
}
@@ -699,7 +812,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
auto AdjustedSumProb =
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BranchProbability SuccProb =
@@ -718,15 +831,23 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
continue;
DEBUG(
- dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
- << " (prob)"
+ dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
+ << SuccProb
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestProb >= SuccProb)
+
+ if (BestSucc && BestProb >= SuccProb) {
+ DEBUG(dbgs() << " Not the best candidate, continuing\n");
continue;
+ }
+
+ DEBUG(dbgs() << " Setting it as best candidate\n");
BestSucc = Succ;
BestProb = SuccProb;
}
+ if (BestSucc)
+ DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
+
return BestSucc;
}
@@ -746,10 +867,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ WorkList.erase(remove_if(WorkList,
+ [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
if (WorkList.empty())
@@ -858,7 +979,7 @@ void MachineBlockPlacement::fillWorkLists(
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+ BlockFilterSet *BlockFilter) {
assert(BB && "BB must not be null.\n");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
@@ -893,6 +1014,17 @@ void MachineBlockPlacement::buildChain(
"layout successor until the CFG reduces\n");
}
+ // Placement may have changed tail duplication opportunities.
+ // Check for that now.
+ if (TailDupPlacement && BestSucc) {
+ // If the chosen successor was duplicated into all its predecessors,
+ // don't bother laying it out, just go round the loop again with BB as
+ // the chain end.
+ if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+ BlockFilter, PrevUnplacedBlockIt))
+ continue;
+ }
+
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
// Zero out UnscheduledPredecessors for the successor we're about to merge in case
@@ -922,6 +1054,16 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock *
MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction()->optForSize())
+ return L.getHeader();
+
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
@@ -937,7 +1079,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
- DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
+ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
<< Pred->succ_size() << " successors, ";
MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
@@ -1066,8 +1208,14 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
}
// Without a candidate exiting block or with only a single block in the
// loop, just use the loop header to layout the loop.
- if (!ExitingBB || L.getNumBlocks() == 1)
+ if (!ExitingBB) {
+ DEBUG(dbgs() << " No other candidate exit blocks, using loop header\n");
return nullptr;
+ }
+ if (L.getNumBlocks() == 1) {
+ DEBUG(dbgs() << " Loop has 1 block, using loop header as exit\n");
+ return nullptr;
+ }
// Also, if we have exit blocks which lead to outer loops but didn't select
// one of them as the exiting block we are rotating toward, disable loop
@@ -1116,8 +1264,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
}
}
- BlockChain::iterator ExitIt =
- std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
+ BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -1140,7 +1287,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
- auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+ auto HeaderIter = find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -1340,9 +1487,8 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
- MachineBasicBlock *ExitingBB = nullptr;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- ExitingBB = findBestLoopExit(L, LoopBlockSet);
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -1361,7 +1507,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
else
- rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+ rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -1374,7 +1520,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
}
for (MachineBasicBlock *ChainBB : LoopChain) {
dbgs() << " ... " << getBlockName(ChainBB) << "\n";
- if (!LoopBlockSet.erase(ChainBB)) {
+ if (!LoopBlockSet.remove(ChainBB)) {
// We don't mark the loop as bad here because there are real situations
// where this can occur. For example, with an unanalyzable fallthrough
// from a loop block to a non-loop block or vice versa.
@@ -1451,6 +1597,9 @@ void MachineBlockPlacement::buildCFGChains() {
<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");
Chain->merge(NextBB, nullptr);
+#ifndef NDEBUG
+ BlocksWithUnanalyzableExits.insert(&*BB);
+#endif
FI = NextFI;
BB = NextBB;
}
@@ -1460,6 +1609,7 @@ void MachineBlockPlacement::buildCFGChains() {
collectMustExecuteBBs();
// Build any loop-based chains.
+ PreferredLoopExit = nullptr;
for (MachineLoop *L : *MLI)
buildLoopChains(*L);
@@ -1522,6 +1672,19 @@ void MachineBlockPlacement::buildCFGChains() {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+#ifndef NDEBUG
+ if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
+ // Given the exact block placement we chose, we may actually not _need_ to
+ // be able to edit PrevBB's terminator sequence, but not being _able_ to
+ // do that at this point is a bug.
+ assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
+ !PrevBB->canFallThrough()) &&
+ "Unexpected block with un-analyzable fallthrough!");
+ Cond.clear();
+ TBB = FBB = nullptr;
+ }
+#endif
+
// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explicit "goto" instruction
// before layout, and no longer fall-through it after layout; or
@@ -1576,15 +1739,15 @@ void MachineBlockPlacement::optimizeBranches() {
if (TBB && !Cond.empty() && FBB &&
MBPI->getEdgeProbability(ChainBB, FBB) >
MBPI->getEdgeProbability(ChainBB, TBB) &&
- !TII->ReverseBranchCondition(Cond)) {
+ !TII->reverseBranchCondition(Cond)) {
DEBUG(dbgs() << "Reverse order of the two branches: "
<< getBlockName(ChainBB) << "\n");
DEBUG(dbgs() << " Edge probability: "
<< MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
<< MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
DebugLoc dl; // FIXME: this is nowhere
- TII->RemoveBranch(*ChainBB);
- TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+ TII->removeBranch(*ChainBB);
+ TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
ChainBB->updateTerminator();
}
}
@@ -1659,6 +1822,175 @@ void MachineBlockPlacement::alignBlocks() {
}
}
+/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
+/// it was duplicated into its chain predecessor and removed.
+/// \p BB - Basic block that may be duplicated.
+///
+/// \p LPred - Chosen layout predecessor of \p BB.
+/// Updated to be the chain end if LPred is removed.
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// @return true if \p BB was removed.
+bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+ MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt) {
+ bool Removed, DuplicatedToLPred;
+ bool DuplicatedToOriginalLPred;
+ Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ if (!Removed)
+ return false;
+ DuplicatedToOriginalLPred = DuplicatedToLPred;
+ // Iteratively try to duplicate again. It can happen that a block that is
+ // duplicated into is still small enough to be duplicated again.
+ // No need to call markBlockSuccessors in this case, as the blocks being
+ // duplicated from here on are already scheduled.
+ // Note that DuplicatedToLPred always implies Removed.
+ while (DuplicatedToLPred) {
+ assert (Removed && "Block must have been removed to be duplicated into its "
+ "layout predecessor.");
+ MachineBasicBlock *DupBB, *DupPred;
+ // The removal callback causes Chain.end() to be updated when a block is
+ // removed. On the first pass through the loop, the chain end should be the
+ // same as it was on function entry. On subsequent passes, because we are
+ // duplicating the block at the end of the chain, if it is removed the
+ // chain will have shrunk by one block.
+ BlockChain::iterator ChainEnd = Chain.end();
+ DupBB = *(--ChainEnd);
+ // Now try to duplicate again.
+ if (ChainEnd == Chain.begin())
+ break;
+ DupPred = *std::prev(ChainEnd);
+ Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
+ PrevUnplacedBlockIt,
+ DuplicatedToLPred);
+ }
+ // If BB was duplicated into LPred, it is now scheduled. But because it was
+ // removed, markChainSuccessors won't be called for its chain. Instead we
+ // call markBlockSuccessors for LPred to achieve the same effect. This must go
+ // at the end because repeating the tail duplication can increase the number
+ // of unscheduled predecessors.
+ LPred = *std::prev(Chain.end());
+ if (DuplicatedToOriginalLPred)
+ markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
+ return true;
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable.
+/// \p BB - Basic block that may be duplicated
+/// \p LPred - Chosen layout predecessor of \p BB
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+/// Used to identify which blocks to update predecessor
+/// counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+/// chosen in the given order due to unnatural CFG
+/// only needed if \p BB is removed and
+/// \p PrevUnplacedBlockIt pointed to \p BB.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
+/// only be true if the block was removed.
+/// \return - True if the block was duplicated into all preds and removed.
+bool MachineBlockPlacement::maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ const BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToLPred) {
+
+ DuplicatedToLPred = false;
+ DEBUG(dbgs() << "Redoing tail duplication for Succ#"
+ << BB->getNumber() << "\n");
+ bool IsSimple = TailDup.isSimpleBB(BB);
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ if (!IsSimple && BB->succ_size() == 1)
+ return false;
+ if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
+ return false;
+ // This has to be a callback because none of it can be done after
+ // BB is deleted.
+ bool Removed = false;
+ auto RemovalCallback =
+ [&](MachineBasicBlock *RemBB) {
+ // Signal to outer function
+ Removed = true;
+
+ // Conservative default.
+ bool InWorkList = true;
+ // Remove from the Chain and Chain Map
+ if (BlockToChain.count(RemBB)) {
+ BlockChain *Chain = BlockToChain[RemBB];
+ InWorkList = Chain->UnscheduledPredecessors == 0;
+ Chain->remove(RemBB);
+ BlockToChain.erase(RemBB);
+ }
+
+ // Handle the unplaced block iterator
+ if (&(*PrevUnplacedBlockIt) == RemBB) {
+ PrevUnplacedBlockIt++;
+ }
+
+ // Handle the Work Lists
+ if (InWorkList) {
+ SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
+ if (RemBB->isEHPad())
+ RemoveList = EHPadWorkList;
+ RemoveList.erase(
+ remove_if(RemoveList,
+ [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+ RemoveList.end());
+ }
+
+ // Handle the filter set
+ if (BlockFilter) {
+ BlockFilter->remove(RemBB);
+ }
+
+ // Remove the block from loop info.
+ MLI->removeBlock(RemBB);
+ if (RemBB == PreferredLoopExit)
+ PreferredLoopExit = nullptr;
+
+ DEBUG(dbgs() << "TailDuplicator deleted block: "
+ << getBlockName(RemBB) << "\n");
+ };
+ auto RemovalCallbackRef =
+ llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+
+ SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
+ &DuplicatedPreds, &RemovalCallbackRef);
+
+ // Update UnscheduledPredecessors to reflect tail-duplication.
+ DuplicatedToLPred = false;
+ for (MachineBasicBlock *Pred : DuplicatedPreds) {
+ // We're only looking for unscheduled predecessors that match the filter.
+ BlockChain* PredChain = BlockToChain[Pred];
+ if (Pred == LPred)
+ DuplicatedToLPred = true;
+ if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
+ || PredChain == &Chain)
+ continue;
+ for (MachineBasicBlock *NewSucc : Pred->successors()) {
+ if (BlockFilter && !BlockFilter->count(NewSucc))
+ continue;
+ BlockChain *NewChain = BlockToChain[NewSucc];
+ if (NewChain != &Chain && NewChain != PredChain)
+ NewChain->UnscheduledPredecessors++;
+ }
+ }
+ return Removed;
+}
+
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -1675,6 +2007,18 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Initialize PreferredLoopExit to nullptr here since it may never be set if
+ // there are no MachineLoops.
+ PreferredLoopExit = nullptr;
+
+ if (TailDupPlacement) {
+ unsigned TailDupSize = TailDuplicatePlacementThreshold;
+ if (MF.getFunction()->optForSize())
+ TailDupSize = 1;
+ TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ }
+
assert(BlockToChain.empty());
buildCFGChains();
@@ -1688,14 +2032,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
+ unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI);
+ *MBPI, TailMergeSize);
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
+ // Must redo the dominator tree if blocks were changed.
+ MDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index fe73406..21eff9d 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -50,8 +50,7 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
// This is a linear search. Try to use the const_succ_iterator version when
// possible.
- return getEdgeProbability(Src,
- std::find(Src->succ_begin(), Src->succ_end(), Dst));
+ return getEdgeProbability(Src, find(Src->successors(), Dst));
}
bool MachineBranchProbabilityInfo::isEdgeHot(
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 1209f73..0766f46 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -177,8 +177,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
- while (I != E && I->isDebugValue())
- ++I;
+ I = skipDebugInstructionsForward(I, E);
if (I == E)
// Reached end of block, register is obviously dead.
@@ -227,7 +226,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
// Reading constant physregs is ok.
- if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ if (!MRI->isConstantPhysReg(Reg))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
@@ -346,7 +345,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
- if (!MI->isInvariantLoad(AA))
+ if (!MI->isDereferenceableInvariantLoad(AA))
// FIXME: we should be able to hoist loads with no other side effects if
// there are no other instructions which can change memory in this loop.
// This is a trivial form of alias analysis.
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 6b5c6ba..5beed5f 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -56,7 +56,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const override { return "Machine InstCombiner"; }
+ StringRef getPassName() const override { return "Machine InstCombiner"; }
private:
bool doSubstitute(unsigned NewSize, unsigned OldSize);
@@ -71,6 +71,7 @@ private:
improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
@@ -134,7 +135,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+ DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -242,6 +243,7 @@ bool MachineCombiner::improvesCriticalPathLen(
MachineBasicBlock *MBB, MachineInstr *Root,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineCombinerPattern Pattern) {
assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
@@ -269,8 +271,13 @@ bool MachineCombiner::improvesCriticalPathLen(
// A more flexible cost calculation for the critical path includes the slack
// of the original code sequence. This may allow the transform to proceed
// even if the instruction depths (data dependency cycles) become worse.
+
unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
- unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+ unsigned RootLatency = 0;
+
+ for (auto I : DelInstrs)
+ RootLatency += TSchedModel.computeInstrLatency(I);
+
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
@@ -421,7 +428,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// resource pressure.
if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
(improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- InstrIdxForVirtReg, P) &&
+ DelInstrs, InstrIdxForVirtReg, P) &&
preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 8fdf39d..5de6dec 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -56,11 +56,12 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
private:
void ClobberRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
@@ -120,6 +121,18 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
}
}
+void MachineCopyPropagation::ReadRegister(unsigned Reg) {
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end()) {
+ DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+}
+
/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
@@ -212,12 +225,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
- }
+ ReadRegister(Src);
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ ReadRegister(Reg);
}
DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
@@ -234,6 +249,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// ...
// %xmm2<def> = copy %xmm9
ClobberRegister(Def);
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ ClobberRegister(Reg);
+ }
// Remember Def is defined by the copy.
for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
@@ -245,7 +268,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Remember source that's copied to Def. Once it's clobbered, then
// it's no longer available for copy propagation.
RegList &DestList = SrcMap[Src];
- if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end())
+ if (!is_contained(DestList, Def))
DestList.push_back(Def);
continue;
@@ -268,17 +291,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (MO.isDef()) {
Defs.push_back(Reg);
- continue;
- }
-
- // If 'Reg' is defined by a copy, the copy is no longer a candidate
- // for elimination.
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- Reg2MIMap::iterator CI = CopyMap.find(*AI);
- if (CI != CopyMap.end()) {
- DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
- MaybeDeadCopies.remove(CI->second);
- }
+ } else {
+ ReadRegister(Reg);
}
// Treat undef use like defs for copy propagation but not for
// dead copy. We would need to do a liveness check to be sure the copy
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index a7c63ef..c1d5ea9 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,28 +54,29 @@ static cl::opt<unsigned>
void MachineFunctionInitializer::anchor() {}
-void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
- // Leave this function even in NDEBUG as an out-of-line anchor.
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- for (BitVector::size_type i = 0; i < Properties.size(); ++i) {
- bool HasProperty = Properties[i];
- if (OnlySet && !HasProperty)
+static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
+ typedef MachineFunctionProperties::Property P;
+ switch(Prop) {
+ case P::FailedISel: return "FailedISel";
+ case P::IsSSA: return "IsSSA";
+ case P::Legalized: return "Legalized";
+ case P::NoPHIs: return "NoPHIs";
+ case P::NoVRegs: return "NoVRegs";
+ case P::RegBankSelected: return "RegBankSelected";
+ case P::Selected: return "Selected";
+ case P::TracksLiveness: return "TracksLiveness";
+ }
+ llvm_unreachable("Invalid machine function property");
+}
+
+void MachineFunctionProperties::print(raw_ostream &OS) const {
+ const char *Separator = "";
+ for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
+ if (!Properties[I])
continue;
- switch(static_cast<Property>(i)) {
- case Property::IsSSA:
- ROS << (HasProperty ? "SSA, " : "Post SSA, ");
- break;
- case Property::TracksLiveness:
- ROS << (HasProperty ? "" : "not ") << "tracking liveness, ";
- break;
- case Property::AllVRegsAllocated:
- ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs");
- break;
- default:
- break;
- }
+ OS << Separator << getPropertyName(static_cast<Property>(I));
+ Separator = ", ";
}
-#endif
}
//===----------------------------------------------------------------------===//
@@ -85,7 +86,7 @@ void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
// Out-of-line virtual method.
MachineFunctionInfo::~MachineFunctionInfo() {}
-void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MBB->getParent()->DeleteMachineBasicBlock(MBB);
}
@@ -100,6 +101,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
unsigned FunctionNum, MachineModuleInfo &mmi)
: Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
MMI(mmi) {
+ FunctionNumber = FunctionNum;
+ init();
+}
+
+void MachineFunction::init() {
// Assume the function starts in SSA form with correct liveness.
Properties.set(MachineFunctionProperties::Property::IsSSA);
Properties.set(MachineFunctionProperties::Property::TracksLiveness);
@@ -112,11 +118,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
// We can realign the stack if the target supports it and the user hasn't
// explicitly asked us not to.
bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
- !F->hasFnAttribute("no-realign-stack");
+ !Fn->hasFnAttribute("no-realign-stack");
FrameInfo = new (Allocator) MachineFrameInfo(
getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
/*ForceRealign=*/CanRealignSP &&
- F->hasFnAttribute(Attribute::StackAlignment));
+ Fn->hasFnAttribute(Attribute::StackAlignment));
if (Fn->hasFnAttribute(Attribute::StackAlignment))
FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
@@ -133,15 +139,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
if (AlignAllFunctions)
Alignment = AlignAllFunctions;
- FunctionNumber = FunctionNum;
JumpTableInfo = nullptr;
if (isFuncletEHPersonality(classifyEHPersonality(
- F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) {
WinEHInfo = new (Allocator) WinEHFuncInfo();
}
- assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+ assert(Target.isCompatibleDataLayout(getDataLayout()) &&
"Can't create a MachineFunction using a Module with a "
"Target-incompatible DataLayout attached\n");
@@ -149,6 +154,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
}
MachineFunction::~MachineFunction() {
+ clear();
+}
+
+void MachineFunction::clear() {
+ Properties.reset();
// Don't call destructors on MachineInstr and MachineOperand. All of their
// memory comes from the BumpPtrAllocator which is about to be purged.
//
@@ -296,9 +306,12 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
MachineMemOperand *MachineFunction::getMachineMemOperand(
MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
- unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+ SynchronizationScope SynchScope, AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering) {
return new (Allocator)
- MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges);
+ MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
+ SynchScope, Ordering, FailureOrdering);
}
MachineMemOperand *
@@ -308,13 +321,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getValue(),
MMO->getOffset()+Offset),
- MMO->getFlags(), Size,
- MMO->getBaseAlignment());
+ MMO->getFlags(), Size, MMO->getBaseAlignment(),
+ AAMDNodes(), nullptr, MMO->getSynchScope(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
return new (Allocator)
MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
- MMO->getFlags(), Size,
- MMO->getBaseAlignment());
+ MMO->getFlags(), Size, MMO->getBaseAlignment(),
+ AAMDNodes(), nullptr, MMO->getSynchScope(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
}
MachineInstr::mmo_iterator
@@ -345,7 +360,9 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo());
+ (*I)->getAAInfo(), nullptr,
+ (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getFailureOrdering());
Result[Index] = JustLoad;
}
++Index;
@@ -377,7 +394,9 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
(*I)->getSize(), (*I)->getBaseAlignment(),
- (*I)->getAAInfo());
+ (*I)->getAAInfo(), nullptr,
+ (*I)->getSynchScope(), (*I)->getOrdering(),
+ (*I)->getFailureOrdering());
Result[Index] = JustStore;
}
++Index;
@@ -406,9 +425,8 @@ StringRef MachineFunction::getName() const {
void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
OS << "# Machine code for function " << getName() << ": ";
- OS << "Properties: <";
getProperties().print(OS);
- OS << ">\n";
+ OS << '\n';
// Print Frame Information
FrameInfo->print(*this, OS);
@@ -535,8 +553,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
assert(JumpTableInfo && "No jump tables");
assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
- const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
- : DL.getPrivateGlobalPrefix();
+ StringRef Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+ : DL.getPrivateGlobalPrefix();
SmallString<60> Name;
raw_svector_ostream(Name)
<< Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -550,6 +568,193 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
Twine(getFunctionNumber()) + "$pb");
}
+/// \name Exception Handling
+/// \{
+
+LandingPadInfo &
+MachineFunction::getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+void MachineFunction::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalValue *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalValue *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = nullptr;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j;
+ --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+ const Function *Filter,
+ const BlockAddress *RecoverBA) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ SEHHandler Handler;
+ Handler.FilterOrFinally = Filter;
+ Handler.RecoverBA = RecoverBA;
+ LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+ const Function *Cleanup) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ SEHHandler Handler;
+ Handler.FilterOrFinally = Cleanup;
+ Handler.RecoverBA = nullptr;
+ LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
+
+void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
+ if (const auto *PF = dyn_cast<Function>(
+ I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+ MF.getMMI().addPersonality(PF);
+
+ if (I.isCleanup())
+ MF.addCleanup(&MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MF.addCatchTypeInfo(&MBB,
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalValue *, 4> FilterList;
+ for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+ II != IE; ++II)
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+ MF.addFilterTypeInfo(&MBB, FilterList);
+ }
+ }
+}
+
+/// \}
+
//===----------------------------------------------------------------------===//
// MachineFrameInfo implementation
//===----------------------------------------------------------------------===//
@@ -634,11 +839,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
/// Create a spill slot at a fixed location on the stack.
/// Returns an index with a negative value.
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
- int64_t SPOffset) {
+ int64_t SPOffset,
+ bool Immutable) {
unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
- Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
- /*Immutable*/ true,
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ true,
/*Alloca*/ nullptr,
/*isAliased*/ false));
@@ -890,13 +1095,20 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
}
MachineConstantPool::~MachineConstantPool() {
+ // A constant may be a member of both Constants and MachineCPVsSharingEntries,
+ // so keep track of which we've deleted to avoid double deletions.
+ DenseSet<MachineConstantPoolValue*> Deleted;
for (unsigned i = 0, e = Constants.size(); i != e; ++i)
- if (Constants[i].isMachineConstantPoolEntry())
+ if (Constants[i].isMachineConstantPoolEntry()) {
+ Deleted.insert(Constants[i].Val.MachineCPVal);
delete Constants[i].Val.MachineCPVal;
+ }
for (DenseSet<MachineConstantPoolValue*>::iterator I =
MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
- I != E; ++I)
- delete *I;
+ I != E; ++I) {
+ if (Deleted.count(*I) == 0)
+ delete *I;
+ }
}
/// Test whether the given two constants can be allocated the same constant pool
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
deleted file mode 100644
index 338cd1e..0000000
--- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the definitions of the MachineFunctionAnalysis members.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
-using namespace llvm;
-
-char MachineFunctionAnalysis::ID = 0;
-
-MachineFunctionAnalysis::MachineFunctionAnalysis(
- const TargetMachine &tm, MachineFunctionInitializer *MFInitializer)
- : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) {
- initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-}
-
-MachineFunctionAnalysis::~MachineFunctionAnalysis() {
- releaseMemory();
- assert(!MF && "MachineFunctionAnalysis left initialized!");
-}
-
-void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
-}
-
-bool MachineFunctionAnalysis::doInitialization(Module &M) {
- MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- assert(MMI && "MMI not around yet??");
- MMI->setModule(&M);
- NextFnNum = 0;
- return false;
-}
-
-
-bool MachineFunctionAnalysis::runOnFunction(Function &F) {
- assert(!MF && "MachineFunctionAnalysis already initialized!");
- MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>());
- if (MFInitializer)
- MFInitializer->initializeMachineFunction(*MF);
- return false;
-}
-
-void MachineFunctionAnalysis::releaseMemory() {
- delete MF;
- MF = nullptr;
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 228fe17..2265676 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -22,7 +22,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Dominators.h"
@@ -41,7 +41,9 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
if (F.hasAvailableExternallyLinkage())
return false;
- MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineFunction &MF = MMI.getMachineFunction(F);
+
MachineFunctionProperties &MFProps = MF.getProperties();
#ifndef NDEBUG
@@ -49,7 +51,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
errs() << "MachineFunctionProperties required by " << getPassName()
<< " pass are not met by function " << F.getName() << ".\n"
<< "Required properties: ";
- RequiredProperties.print(errs(), /*OnlySet=*/true);
+ RequiredProperties.print(errs());
errs() << "\nCurrent properties: ";
MFProps.print(errs());
errs() << "\n";
@@ -60,13 +62,13 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
bool RV = runOnMachineFunction(MF);
MFProps.set(SetProperties);
- MFProps.clear(ClearedProperties);
+ MFProps.reset(ClearedProperties);
return RV;
}
void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addPreserved<MachineModuleInfo>();
// MachineFunctionPass preserves all LLVM IR passes, but there's no
// high-level way to express this. Instead, just list a bunch of
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 4f424ff..0d533c3 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -34,7 +34,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
: MachineFunctionPass(ID), OS(os), Banner(banner) {}
- const char *getPassName() const override { return "MachineFunction Printer"; }
+ StringRef getPassName() const override { return "MachineFunction Printer"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 3cdf8d2..2f2e3b3 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -91,6 +93,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
// Note that getSubReg() may return 0 if the sub-register doesn't exist.
// That won't happen in legal code.
setSubReg(0);
+ if (isDef())
+ setIsUndef(false);
}
setReg(Reg);
}
@@ -171,6 +175,16 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
Contents.Sym = Sym;
}
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a FrameIndex");
+
+ removeRegFromUses();
+
+ OpKind = MO_FrameIndex;
+ setIndex(Idx);
+}
+
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
@@ -256,6 +270,10 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getCFIIndex() == Other.getCFIIndex();
case MachineOperand::MO_Metadata:
return getMetadata() == Other.getMetadata();
+ case MachineOperand::MO_IntrinsicID:
+ return getIntrinsicID() == Other.getIntrinsicID();
+ case MachineOperand::MO_Predicate:
+ return getPredicate() == Other.getPredicate();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -300,18 +318,23 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
case MachineOperand::MO_CFIIndex:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
}
llvm_unreachable("Invalid machine operand type");
}
-void MachineOperand::print(raw_ostream &OS,
- const TargetRegisterInfo *TRI) const {
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, TRI);
+ print(OS, DummyMST, TRI, IntrinsicInfo);
}
void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo *TRI,
+ const TargetIntrinsicInfo *IntrinsicInfo) const {
switch (getType()) {
case MachineOperand::MO_Register:
OS << PrintReg(getReg(), TRI, getSubReg());
@@ -378,7 +401,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
} else if (getFPImm()->getType()->isHalfTy()) {
APFloat APF = getFPImm()->getValueAPF();
bool Unused;
- APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused);
+ APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
OS << "half " << APF.convertToFloat();
} else {
OS << getFPImm()->getValueAPF().convertToDouble();
@@ -454,12 +477,32 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_CFIIndex:
OS << "<call frame instruction>";
break;
+ case MachineOperand::MO_IntrinsicID: {
+ Intrinsic::ID ID = getIntrinsicID();
+ if (ID < Intrinsic::num_intrinsics)
+ OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+ else if (IntrinsicInfo)
+ OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+ else
+ OS << "<intrinsic:" << ID << '>';
+ break;
+ }
+ case MachineOperand::MO_Predicate: {
+ auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+ OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
+ << CmpInst::getPredicateName(Pred) << '>';
+ }
}
-
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const {
+ dbgs() << *this << '\n';
+}
+#endif
+
//===----------------------------------------------------------------------===//
// MachineMemOperand Implementation
//===----------------------------------------------------------------------===//
@@ -500,7 +543,10 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
uint64_t s, unsigned int a,
const AAMDNodes &AAInfo,
- const MDNode *Ranges)
+ const MDNode *Ranges,
+ SynchronizationScope SynchScope,
+ AtomicOrdering Ordering,
+ AtomicOrdering FailureOrdering)
: PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
AAInfo(AAInfo), Ranges(Ranges) {
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
@@ -508,6 +554,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
"invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
+
+ AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope);
+ assert(getSynchScope() == SynchScope && "Value truncated");
+ AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+ assert(getOrdering() == Ordering && "Value truncated");
+ AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+ assert(getFailureOrdering() == FailureOrdering && "Value truncated");
}
/// Profile - Gather unique data for the object.
@@ -623,10 +676,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
OS << ")";
}
- // Print nontemporal info.
if (isNonTemporal())
OS << "(nontemporal)";
-
+ if (isDereferenceable())
+ OS << "(dereferenceable)";
if (isInvariant())
OS << "(invariant)";
}
@@ -653,12 +706,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
: MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
- debugLoc(std::move(dl))
-#ifdef LLVM_BUILD_GLOBAL_ISEL
- ,
- Ty(nullptr)
-#endif
-{
+ debugLoc(std::move(dl)) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -677,12 +725,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
: MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
- MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc())
-#ifdef LLVM_BUILD_GLOBAL_ISEL
- ,
- Ty(nullptr)
-#endif
-{
+ MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -705,25 +748,6 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
-// Implement dummy setter and getter for type when
-// global-isel is not built.
-// The proper implementation is WIP and is tracked here:
-// PR26576.
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-void MachineInstr::setType(Type *Ty) {}
-
-Type *MachineInstr::getType() const { return nullptr; }
-
-#else
-void MachineInstr::setType(Type *Ty) {
- assert((!Ty || isPreISelGenericOpcode(getOpcode())) &&
- "Non generic instructions are not supposed to be typed");
- this->Ty = Ty;
-}
-
-Type *MachineInstr::getType() const { return Ty; }
-#endif // LLVM_BUILD_GLOBAL_ISEL
-
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
/// this instruction from their respective use lists. This requires that the
/// operands already be on their use lists.
@@ -976,16 +1000,24 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
return false;
if (isBundle()) {
- // Both instructions are bundles, compare MIs inside the bundle.
+ // We have passed the test above that both instructions have the same
+ // opcode, so we know that both instructions are bundles here. Let's compare
+ // MIs inside the bundle.
+ assert(Other.isBundle() && "Expected that both instructions are bundles.");
MachineBasicBlock::const_instr_iterator I1 = getIterator();
- MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
- MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end();
- while (++I1 != E1 && I1->isInsideBundle()) {
+ // Loop until we analysed the last intruction inside at least one of the
+ // bundles.
+ while (I1->isBundledWithSucc() && I2->isBundledWithSucc()) {
+ ++I1;
++I2;
- if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check))
+ if (!I1->isIdenticalTo(*I2, Check))
return false;
}
+ // If we've reached the end of just one of the two bundles, but not both,
+ // the instructions are not identical.
+ if (I1->isBundledWithSucc() || I2->isBundledWithSucc())
+ return false;
}
// Check operands to make sure they match.
@@ -1287,8 +1319,8 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
-int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
- const TargetRegisterInfo *TRI) const {
+int MachineInstr::findRegisterUseOperandIdx(
+ unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -1296,11 +1328,9 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
unsigned MOReg = MO.getReg();
if (!MOReg)
continue;
- if (MOReg == Reg ||
- (TRI &&
- TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TRI->isSubRegister(MOReg, Reg)))
+ if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
if (!isKill || MO.isKill())
return i;
}
@@ -1533,7 +1563,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
// destination. The check for isInvariantLoad gives the targe the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (mayLoad() && !isInvariantLoad(AA))
+ if (mayLoad() && !isDereferenceableInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
// end of block, we can't move it.
return !SawStore;
@@ -1564,12 +1594,10 @@ bool MachineInstr::hasOrderedMemoryRef() const {
});
}
-/// isInvariantLoad - Return true if this instruction is loading from a
-/// location whose value is invariant across the function. For example,
-/// loading a value from the constant pool or from the argument area
-/// of a function if it does not change. This should only return true of
-/// *all* loads the instruction does are invariant (if it does multiple loads).
-bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+/// isDereferenceableInvariantLoad - Return true if this instruction will never
+/// trap and is loading from a location whose value is invariant across a run of
+/// this function.
+bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
if (!mayLoad())
return false;
@@ -1579,16 +1607,17 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
if (memoperands_empty())
return false;
- const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+ const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
for (MachineMemOperand *MMO : memoperands()) {
if (MMO->isVolatile()) return false;
if (MMO->isStore()) return false;
- if (MMO->isInvariant()) continue;
+ if (MMO->isInvariant() && MMO->isDereferenceable())
+ continue;
// A load from a constant PseudoSourceValue is invariant.
if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
- if (PSV->isConstant(MFI))
+ if (PSV->isConstant(&MFI))
continue;
if (const Value *V = MMO->getValue()) {
@@ -1663,35 +1692,40 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
}
}
-LLVM_DUMP_METHOD void MachineInstr::dump() const {
+LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- dbgs() << " " << *this;
+ dbgs() << " ";
+ print(dbgs(), false /* SkipOpers */, TII);
#endif
}
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+ const TargetInstrInfo *TII) const {
const Module *M = nullptr;
if (const MachineBasicBlock *MBB = getParent())
if (const MachineFunction *MF = MBB->getParent())
M = MF->getFunction()->getParent();
ModuleSlotTracker MST(M);
- print(OS, MST, SkipOpers);
+ print(OS, MST, SkipOpers, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
- bool SkipOpers) const {
+ bool SkipOpers, const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
- const TargetInstrInfo *TII = nullptr;
+ const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
+
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
if (MF) {
MRI = &MF->getRegInfo();
TRI = MF->getSubtarget().getRegisterInfo();
- TII = MF->getSubtarget().getInstrInfo();
+ if (!TII)
+ TII = MF->getSubtarget().getInstrInfo();
+ IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
}
}
@@ -1705,13 +1739,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
!getOperand(StartOp).isImplicit();
++StartOp) {
if (StartOp != 0) OS << ", ";
- getOperand(StartOp).print(OS, MST, TRI);
+ getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
unsigned Reg = getOperand(StartOp).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
VirtRegs.push_back(Reg);
- unsigned Size;
- if (MRI && (Size = MRI->getSize(Reg)))
- OS << '(' << Size << ')';
+ LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
+ if (Ty.isValid())
+ OS << '(' << Ty << ')';
}
}
@@ -1724,12 +1758,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else
OS << "UNKNOWN";
- if (getType()) {
- OS << ' ';
- getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
- OS << ' ';
- }
-
if (SkipOpers)
return;
@@ -1812,7 +1840,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "!\"" << DIV->getName() << '\"';
else
MO.print(OS, MST, TRI);
- } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ } else if (TRI && (isInsertSubreg() || isRegSequence() ||
+ (isSubregToReg() && i == 3)) && MO.isImm()) {
OS << TRI->getSubRegIndexName(MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
@@ -2145,8 +2174,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
// If there are no uses, including partial uses, the def is dead.
- if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
- [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ if (none_of(UsedRegs,
+ [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
MO.setIsDead();
}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index e4686b3..b5621a0 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -24,7 +24,8 @@ namespace {
class UnpackMachineBundles : public MachineFunctionPass {
public:
static char ID; // Pass identification
- UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
+ UnpackMachineBundles(
+ std::function<bool(const MachineFunction &)> Ftor = nullptr)
: MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
}
@@ -32,7 +33,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- std::function<bool(const Function &)> PredicateFtor;
+ std::function<bool(const MachineFunction &)> PredicateFtor;
};
} // end anonymous namespace
@@ -42,7 +43,7 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
"Unpack machine instruction bundles", false, false)
bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
- if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ if (PredicateFtor && !PredicateFtor(MF))
return false;
bool Changed = false;
@@ -78,7 +79,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
}
FunctionPass *
-llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
+llvm::createUnpackMachineBundles(
+ std::function<bool(const MachineFunction &)> Ftor) {
return new UnpackMachineBundles(std::move(Ftor));
}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 119751b..b3d1843 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -92,8 +92,7 @@ namespace {
SmallVector<MachineBasicBlock*, 8> ExitBlocks;
bool isExitBlock(const MachineBasicBlock *MBB) const {
- return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
- ExitBlocks.end();
+ return is_contained(ExitBlocks, MBB);
}
// Track 'estimated' register pressure.
@@ -268,7 +267,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
TLI = ST.getTargetLowering();
TRI = ST.getRegisterInfo();
- MFI = MF.getFrameInfo();
+ MFI = &MF.getFrameInfo();
MRI = &MF.getRegInfo();
SchedModel.init(ST.getSchedModel(), &ST, TII);
@@ -896,7 +895,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+ if (!MRI->isConstantPhysReg(Reg))
return false;
// Otherwise it's safe to move.
continue;
@@ -1139,7 +1138,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) {
+ if (!TII->isTriviallyReMaterializable(MI, AA) &&
+ !MI.isDereferenceableInvariantLoad(AA)) {
DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
}
@@ -1158,7 +1158,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
- if (!MI->isInvariantLoad(AA))
+ if (!MI->isDereferenceableInvariantLoad(AA))
return nullptr;
// Next determine the register class for a temporary register.
@@ -1336,6 +1336,11 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+ // Since we are moving the instruction out of its basic block, we do not
+ // retain its debug location. Doing so would degrade the debugging
+ // experience and adversely affect the accuracy of profiling information.
+ MI->setDebugLoc(DebugLoc());
+
// Update register pressure for BBs from header to this block.
UpdateBackTraceRegPressure(MI);
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 376f78f..fdeaf7b 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -77,6 +77,51 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
return BotMBB;
}
+MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+ if (MachineBasicBlock *Latch = getLoopLatch()) {
+ if (isLoopExiting(Latch))
+ return Latch;
+ else
+ return getExitingBlock();
+ }
+ return nullptr;
+}
+
+MachineBasicBlock *
+MachineLoopInfo::findLoopPreheader(MachineLoop *L,
+ bool SpeculativePreheader) const {
+ if (MachineBasicBlock *PB = L->getLoopPreheader())
+ return PB;
+
+ if (!SpeculativePreheader)
+ return nullptr;
+
+ MachineBasicBlock *HB = L->getHeader(), *LB = L->getLoopLatch();
+ if (HB->pred_size() != 2 || HB->hasAddressTaken())
+ return nullptr;
+ // Find the predecessor of the header that is not the latch block.
+ MachineBasicBlock *Preheader = nullptr;
+ for (MachineBasicBlock *P : HB->predecessors()) {
+ if (P == LB)
+ continue;
+ // Sanity.
+ if (Preheader)
+ return nullptr;
+ Preheader = P;
+ }
+
+ // Check if the preheader candidate is a successor of any other loop
+ // headers. We want to avoid having two loop setups in the same block.
+ for (MachineBasicBlock *S : Preheader->successors()) {
+ if (S == HB)
+ continue;
+ MachineLoop *T = getLoopFor(S);
+ if (T && T->getHeader() == S)
+ return nullptr;
+ }
+ return Preheader;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineLoop::dump() const {
print(dbgs());
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 244e3fb..6618857 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,26 +9,31 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false)
+INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -54,7 +59,7 @@ public:
class MMIAddrLabelMap {
MCContext &Context;
struct AddrLabelSymEntry {
- /// Symbols - The symbols for the label.
+ /// The symbols for the label.
TinyPtrVector<MCSymbol *> Symbols;
Function *Fn; // The containing function of the BasicBlock.
@@ -63,14 +68,13 @@ class MMIAddrLabelMap {
DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
- /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
- /// use this so we get notified if a block is deleted or RAUWd.
+ /// Callbacks for the BasicBlock's that we have entries for. We use this so
+ /// we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
- /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
- /// whose corresponding BasicBlock got deleted. These symbols need to be
- /// emitted at some point in the file, so AsmPrinter emits them after the
- /// function body.
+ /// This is a per-function list of symbols whose corresponding BasicBlock got
+ /// deleted. These symbols need to be emitted at some point in the file, so
+ /// AsmPrinter emits them after the function body.
DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
DeletedAddrLabelsNeedingEmission;
public:
@@ -112,8 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
return Entry.Symbols;
}
-/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
-/// them.
+/// If we have any deleted symbols for F, return them.
void MMIAddrLabelMap::
takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
@@ -186,20 +189,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
- const MCRegisterInfo &MRI,
- const MCObjectFileInfo *MOFI)
- : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) {
+MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+ : ImmutablePass(ID), TM(*TM),
+ Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+ TM->getObjFileLowering(), nullptr, false) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
-MachineModuleInfo::MachineModuleInfo()
- : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) {
- llvm_unreachable("This MachineModuleInfo constructor should never be called, "
- "MMI should always be explicitly constructed by "
- "LLVMTargetMachine");
-}
-
MachineModuleInfo::~MachineModuleInfo() {
}
@@ -207,13 +203,9 @@ bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
- CallsEHReturn = false;
- CallsUnwindInit = false;
- HasEHFunclets = false;
DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
- PersonalityTypeCache = EHPersonality::Unknown;
AddrLabelSymbols = nullptr;
- TheModule = nullptr;
+ TheModule = &M;
return false;
}
@@ -233,30 +225,8 @@ bool MachineModuleInfo::doFinalization(Module &M) {
return false;
}
-/// EndFunction - Discard function meta information.
-///
-void MachineModuleInfo::EndFunction() {
- // Clean up frame info.
- FrameInstructions.clear();
-
- // Clean up exception info.
- LandingPads.clear();
- PersonalityTypeCache = EHPersonality::Unknown;
- CallSiteMap.clear();
- TypeInfos.clear();
- FilterIds.clear();
- FilterEnds.clear();
- CallsEHReturn = false;
- CallsUnwindInit = false;
- HasEHFunclets = false;
- VariableDbgInfos.clear();
-}
-
//===- Address of Block Management ----------------------------------------===//
-/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
-/// basic block when its address is taken. If other blocks were RAUW'd to
-/// this one, we may have to emit them as well, return the whole set.
ArrayRef<MCSymbol *>
MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
// Lazily create AddrLabelSymbols.
@@ -265,11 +235,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
-
-/// takeDeletedSymbolsForFunction - If the specified function has had any
-/// references to address-taken blocks generated, but the block got deleted,
-/// return the symbol now so we can emit it. This prevents emitting a
-/// reference to a symbol that has no definition.
void MachineModuleInfo::
takeDeletedSymbolsForFunction(const Function *F,
std::vector<MCSymbol*> &Result) {
@@ -279,40 +244,8 @@ takeDeletedSymbolsForFunction(const Function *F,
takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
}
-//===- EH -----------------------------------------------------------------===//
-
-/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
-/// specified MachineBasicBlock.
-LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
- (MachineBasicBlock *LandingPad) {
- unsigned N = LandingPads.size();
- for (unsigned i = 0; i < N; ++i) {
- LandingPadInfo &LP = LandingPads[i];
- if (LP.LandingPadBlock == LandingPad)
- return LP;
- }
-
- LandingPads.push_back(LandingPadInfo(LandingPad));
- return LandingPads[N];
-}
-
-/// addInvoke - Provide the begin and end labels of an invoke style call and
-/// associate it with a try landing pad block.
-void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
- MCSymbol *BeginLabel, MCSymbol *EndLabel) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.BeginLabels.push_back(BeginLabel);
- LP.EndLabels.push_back(EndLabel);
-}
-
-/// addLandingPad - Provide the label of a try LandingPad block.
-///
-MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
- MCSymbol *LandingPadLabel = Context.createTempSymbol();
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.LandingPadLabel = LandingPadLabel;
- return LandingPadLabel;
-}
+/// \name Exception Handling
+/// \{
void MachineModuleInfo::addPersonality(const Function *Personality) {
for (unsigned i = 0; i < Personalities.size(); ++i)
@@ -321,143 +254,83 @@ void MachineModuleInfo::addPersonality(const Function *Personality) {
Personalities.push_back(Personality);
}
-/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addCatchTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalValue *> TyInfo) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- for (unsigned N = TyInfo.size(); N; --N)
- LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
-}
-
-/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addFilterTypeInfo(MachineBasicBlock *LandingPad,
- ArrayRef<const GlobalValue *> TyInfo) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- std::vector<unsigned> IdsInFilter(TyInfo.size());
- for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
- IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
- LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
-}
-
-/// addCleanup - Add a cleanup action for a landing pad.
-///
-void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- LP.TypeIds.push_back(0);
-}
+/// \}
+
+MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
+ // Shortcut for the common case where a sequence of MachineFunctionPasses
+ // all query for the same Function.
+ if (LastRequest == &F)
+ return *LastResult;
+
+ auto I = MachineFunctions.insert(
+ std::make_pair(&F, std::unique_ptr<MachineFunction>()));
+ MachineFunction *MF;
+ if (I.second) {
+ // No pre-existing machine function, create a new one.
+ MF = new MachineFunction(&F, TM, NextFnNum++, *this);
+ // Update the set entry.
+ I.first->second.reset(MF);
+
+ if (MFInitializer)
+ if (MFInitializer->initializeMachineFunction(*MF))
+ report_fatal_error("Unable to initialize machine function");
+ } else {
+ MF = I.first->second.get();
+ }
-void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
- const Function *Filter,
- const BlockAddress *RecoverBA) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Filter;
- Handler.RecoverBA = RecoverBA;
- LP.SEHHandlers.push_back(Handler);
+ LastRequest = &F;
+ LastResult = MF;
+ return *MF;
}
-void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
- const Function *Cleanup) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Cleanup;
- Handler.RecoverBA = nullptr;
- LP.SEHHandlers.push_back(Handler);
+void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
+ MachineFunctions.erase(&F);
+ LastRequest = nullptr;
+ LastResult = nullptr;
}
-/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
-/// pads.
-void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
- for (unsigned i = 0; i != LandingPads.size(); ) {
- LandingPadInfo &LandingPad = LandingPads[i];
- if (LandingPad.LandingPadLabel &&
- !LandingPad.LandingPadLabel->isDefined() &&
- (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
- LandingPad.LandingPadLabel = nullptr;
-
- // Special case: we *should* emit LPs with null LP MBB. This indicates
- // "nounwind" case.
- if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
- }
-
- for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
- MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
- MCSymbol *EndLabel = LandingPad.EndLabels[j];
- if ((BeginLabel->isDefined() ||
- (LPMap && (*LPMap)[BeginLabel] != 0)) &&
- (EndLabel->isDefined() ||
- (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
- LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
- LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
- --j;
- --e;
- }
-
- // Remove landing pads with no try-ranges.
- if (LandingPads[i].BeginLabels.empty()) {
- LandingPads.erase(LandingPads.begin() + i);
- continue;
- }
+namespace {
+/// This pass frees the MachineFunction object associated with a Function.
+class FreeMachineFunction : public FunctionPass {
+public:
+ static char ID;
+ FreeMachineFunction() : FunctionPass(ID) {}
- // If there is no landing pad, ensure that the list of typeids is empty.
- // If the only typeid is a cleanup, this is the same as having no typeids.
- if (!LandingPad.LandingPadBlock ||
- (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
- LandingPad.TypeIds.clear();
- ++i;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfo>();
+ AU.addPreserved<MachineModuleInfo>();
}
-}
-/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
-/// indexes.
-void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
- ArrayRef<unsigned> Sites) {
- LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
-}
-
-/// getTypeIDFor - Return the type id for the specified typeinfo. This is
-/// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
- for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
- if (TypeInfos[i] == TI) return i + 1;
+ bool runOnFunction(Function &F) override {
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MMI.deleteMachineFunctionFor(F);
+ return true;
+ }
+};
+char FreeMachineFunction::ID;
+} // end anonymous namespace
- TypeInfos.push_back(TI);
- return TypeInfos.size();
+namespace llvm {
+FunctionPass *createFreeMachineFunctionPass() {
+ return new FreeMachineFunction();
}
-
-/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
-/// function wide.
-int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
- // If the new filter coincides with the tail of an existing filter, then
- // re-use the existing filter. Folding filters more than this requires
- // re-ordering filters and/or their elements - probably not worth it.
- for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
- E = FilterEnds.end(); I != E; ++I) {
- unsigned i = *I, j = TyIds.size();
-
- while (i && j)
- if (FilterIds[--i] != TyIds[--j])
- goto try_next;
-
- if (!j)
- // The new filter coincides with range [i, end) of the existing filter.
- return -(1 + i);
-
-try_next:;
+} // end namespace llvm
+
+//===- MMI building helpers -----------------------------------------------===//
+
+void llvm::computeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo &MMI) {
+ FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type *T = I.getArgOperand(i)->getType();
+ for (auto i : post_order(T)) {
+ if (i->isFloatingPointTy()) {
+ MMI.setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
}
-
- // Add the new filter.
- int FilterID = -(1 + FilterIds.size());
- FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
- FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
- FilterEnds.push_back(FilterIds.size());
- FilterIds.push_back(0); // terminator
- return FilterID;
}
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
new file mode 100644
index 0000000..43a1809
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -0,0 +1,3984 @@
+//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlap loop iterations and explioits ILP via a compiler transformation.
+//
+// Swing Modulo Scheduling is an implementation of software pipelining
+// that generates schedules that are near optimal in terms of initiation
+// interval, register requirements, and stage count. See the papers:
+//
+// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
+// Conference on Parallel Architectures and Compilation Techiniques.
+//
+// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
+// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
+// Transactions on Computers, Vol. 50, No. 3, 2001.
+//
+// "An Implementation of Swing Modulo Scheduling With Extensions for
+// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
+// Urbana-Chambpain, 2005.
+//
+//
+// The SMS algorithm consists of three main steps after computing the minimal
+// initiation interval (MII).
+// 1) Analyze the dependence graph and compute information about each
+// instruction in the graph.
+// 2) Order the nodes (instructions) by priority based upon the heuristics
+// described in the algorithm.
+// 3) Attempt to schedule the nodes in the specified order using the MII.
+//
+// This SMS implementation is a target-independent back-end pass. When enabled,
+// the pass runs just prior to the register allocation pass, while the machine
+// IR is in SSA form. If software pipelining is successful, then the original
+// loop is replaced by the optimized loop. The optimized loop contains one or
+// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
+// the instructions cannot be scheduled in a given MII, we increase the MII by
+// one and try again.
+//
+// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
+// represent loop carried dependences in the DAG as order edges to the Phi
+// nodes. We also perform several passes over the DAG to eliminate unnecessary
+// edges that inhibit the ability to pipeline. The implementation uses the
+// DFAPacketizer class to compute the minimum initiation interval and the check
+// where an instruction may be inserted in the pipelined schedule.
+//
+// In order for the SMS pass to work, several target specific hooks need to be
+// implemented to get information about the loop structure and to rewrite
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <map>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
+STATISTIC(NumPipelined, "Number of loops software pipelined");
+
+/// A command line option to turn software pipelining on or off.
+static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
+ cl::ZeroOrMore,
+ cl::desc("Enable Software Pipelining"));
+
+/// A command line option to enable SWP at -Os.
+static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
+ cl::desc("Enable SWP at Os."), cl::Hidden,
+ cl::init(false));
+
+/// A command line argument to limit minimum initial interval for pipelining.
+static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
+ cl::desc("Size limit for the the MII."),
+ cl::Hidden, cl::init(27));
+
+/// A command line argument to limit the number of stages in the pipeline.
+static cl::opt<int>
+ SwpMaxStages("pipeliner-max-stages",
+ cl::desc("Maximum stages allowed in the generated scheduled."),
+ cl::Hidden, cl::init(3));
+
+/// A command line option to disable the pruning of chain dependences due to
+/// an unrelated Phi.
+static cl::opt<bool>
+ SwpPruneDeps("pipeliner-prune-deps",
+ cl::desc("Prune dependences between unrelated Phi nodes."),
+ cl::Hidden, cl::init(true));
+
+/// A command line option to disable the pruning of loop carried order
+/// dependences.
+static cl::opt<bool>
+ SwpPruneLoopCarried("pipeliner-prune-loop-carried",
+ cl::desc("Prune loop carried order dependences."),
+ cl::Hidden, cl::init(true));
+
+#ifndef NDEBUG
+static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
+#endif
+
+static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
+ cl::ReallyHidden, cl::init(false),
+ cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+
+namespace {
+
+class NodeSet;
+class SMSchedule;
+class SwingSchedulerDAG;
+
+/// The main class in the implementation of the target independent
+/// software pipeliner pass.
+class MachinePipeliner : public MachineFunctionPass {
+public:
+ MachineFunction *MF = nullptr;
+ const MachineLoopInfo *MLI = nullptr;
+ const MachineDominatorTree *MDT = nullptr;
+ const InstrItineraryData *InstrItins;
+ const TargetInstrInfo *TII = nullptr;
+ RegisterClassInfo RegClassInfo;
+
+#ifndef NDEBUG
+ static int NumTries;
+#endif
+ /// Cache the target analysis information about the loop.
+ struct LoopInfo {
+ MachineBasicBlock *TBB = nullptr;
+ MachineBasicBlock *FBB = nullptr;
+ SmallVector<MachineOperand, 4> BrCond;
+ MachineInstr *LoopInductionVar = nullptr;
+ MachineInstr *LoopCompare = nullptr;
+ };
+ LoopInfo LI;
+
+ static char ID;
+ MachinePipeliner() : MachineFunctionPass(ID) {
+ initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool canPipelineLoop(MachineLoop &L);
+ bool scheduleLoop(MachineLoop &L);
+ bool swingModuloScheduler(MachineLoop &L);
+};
+
+/// This class builds the dependence graph for the instructions in a loop,
+/// and attempts to schedule the instructions using the SMS algorithm.
+class SwingSchedulerDAG : public ScheduleDAGInstrs {
+ MachinePipeliner &Pass;
+ /// The minimum initiation interval between iterations for this schedule.
+ unsigned MII;
+ /// Set to true if a valid pipelined schedule is found for the loop.
+ bool Scheduled;
+ MachineLoop &Loop;
+ LiveIntervals &LIS;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// A toplogical ordering of the SUnits, which is needed for changing
+ /// dependences and iterating over the SUnits.
+ ScheduleDAGTopologicalSort Topo;
+
+ struct NodeInfo {
+ int ASAP;
+ int ALAP;
+ NodeInfo() : ASAP(0), ALAP(0) {}
+ };
+ /// Computed properties for each node in the graph.
+ std::vector<NodeInfo> ScheduleInfo;
+
+ enum OrderKind { BottomUp = 0, TopDown = 1 };
+ /// Computed node ordering for scheduling.
+ SetVector<SUnit *> NodeOrder;
+
+ typedef SmallVector<NodeSet, 8> NodeSetType;
+ typedef DenseMap<unsigned, unsigned> ValueMapTy;
+ typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
+ typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
+
+ /// Instructions to change when emitting the final schedule.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
+
+ /// We may create a new instruction, so remember it because it
+ /// must be deleted when the pass is finished.
+ SmallPtrSet<MachineInstr *, 4> NewMIs;
+
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+ /// Helper class to implement Johnson's circuit finding algorithm.
+ class Circuits {
+ std::vector<SUnit> &SUnits;
+ SetVector<SUnit *> Stack;
+ BitVector Blocked;
+ SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
+ SmallVector<SmallVector<int, 4>, 16> AdjK;
+ unsigned NumPaths;
+ static unsigned MaxPaths;
+
+ public:
+ Circuits(std::vector<SUnit> &SUs)
+ : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
+ AdjK(SUs.size()) {}
+ /// Reset the data structures used in the circuit algorithm.
+ void reset() {
+ Stack.clear();
+ Blocked.reset();
+ B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
+ NumPaths = 0;
+ }
+ void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+ bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
+ void unblock(int U);
+ };
+
+public:
+ SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
+ const RegisterClassInfo &rci)
+ : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
+ Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
+ Topo(SUnits, &ExitSU) {
+ P.MF->getSubtarget().getSMSMutations(Mutations);
+ }
+
+ void schedule() override;
+ void finishBlock() override;
+
+ /// Return true if the loop kernel has been scheduled.
+ bool hasNewSchedule() { return Scheduled; }
+
+ /// Return the earliest time an instruction may be scheduled.
+ int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
+
+ /// Return the latest time an instruction my be scheduled.
+ int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
+
+ /// The mobility function, which the the number of slots in which
+ /// an instruction may be scheduled.
+ int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
+
+ /// The depth, in the dependence graph, for a node.
+ int getDepth(SUnit *Node) { return Node->getDepth(); }
+
+ /// The height, in the dependence graph, for a node.
+ int getHeight(SUnit *Node) { return Node->getHeight(); }
+
+ /// Return true if the dependence is a back-edge in the data dependence graph.
+ /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
+ /// using an anti dependence from a Phi to an instruction.
+ bool isBackedge(SUnit *Source, const SDep &Dep) {
+ if (Dep.getKind() != SDep::Anti)
+ return false;
+ return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
+ }
+
+ /// Return true if the dependence is an order dependence between non-Phis.
+ static bool isOrder(SUnit *Source, const SDep &Dep) {
+ if (Dep.getKind() != SDep::Order)
+ return false;
+ return (!Source->getInstr()->isPHI() &&
+ !Dep.getSUnit()->getInstr()->isPHI());
+ }
+
+ bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
+
+ /// The latency of the dependence.
+ unsigned getLatency(SUnit *Source, const SDep &Dep) {
+ // Anti dependences represent recurrences, so use the latency of the
+ // instruction on the back-edge.
+ if (Dep.getKind() == SDep::Anti) {
+ if (Source->getInstr()->isPHI())
+ return Dep.getSUnit()->Latency;
+ if (Dep.getSUnit()->getInstr()->isPHI())
+ return Source->Latency;
+ return Dep.getLatency();
+ }
+ return Dep.getLatency();
+ }
+
+ /// The distance function, which indicates that operation V of iteration I
+ /// depends on operations U of iteration I-distance.
+ unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
+ // Instructions that feed a Phi have a distance of 1. Computing larger
+ // values for arrays requires data dependence information.
+ if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
+ return 1;
+ return 0;
+ }
+
+ /// Set the Minimum Initiation Interval for this schedule attempt.
+ void setMII(unsigned mii) { MII = mii; }
+
+ MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
+ bool UpdateDAG = false);
+
+ /// Return the new base register that was stored away for the changed
+ /// instruction.
+ unsigned getInstrBaseReg(SUnit *SU) {
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end())
+ return It->second.first;
+ return 0;
+ }
+
+ void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ Mutations.push_back(std::move(Mutation));
+ }
+
+private:
+ void addLoopCarriedDependences(AliasAnalysis *AA);
+ void updatePhiDependences();
+ void changeDependences();
+ unsigned calculateResMII();
+ unsigned calculateRecMII(NodeSetType &RecNodeSets);
+ void findCircuits(NodeSetType &NodeSets);
+ void fuseRecs(NodeSetType &NodeSets);
+ void removeDuplicateNodes(NodeSetType &NodeSets);
+ void computeNodeFunctions(NodeSetType &NodeSets);
+ void registerPressureFilter(NodeSetType &NodeSets);
+ void colocateNodeSets(NodeSetType &NodeSets);
+ void checkNodeSets(NodeSetType &NodeSets);
+ void groupRemainingNodes(NodeSetType &NodeSets);
+ void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+ SetVector<SUnit *> &NodesAdded);
+ void computeNodeOrder(NodeSetType &NodeSets);
+ bool schedulePipeline(SMSchedule &Schedule);
+ void generatePipelinedLoop(SMSchedule &Schedule);
+ void generateProlog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs);
+ void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum,
+ unsigned CurStageNum, bool IsLast);
+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum,
+ unsigned CurStageNum, bool IsLast);
+ void removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs);
+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ SMSchedule &Schedule);
+ void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
+ ValueMapTy *VRMap);
+ bool computeDelta(MachineInstr &MI, unsigned &Delta);
+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+ unsigned Num);
+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum,
+ SMSchedule &Schedule);
+ void updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum, unsigned InstStageNum,
+ SMSchedule &Schedule, ValueMapTy *VRMap);
+ MachineInstr *findDefInLoop(unsigned Reg);
+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+ unsigned LoopStage, ValueMapTy *VRMap,
+ MachineBasicBlock *BB);
+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+ SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap);
+ void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
+ InstrMapTy &InstrMap, unsigned CurStageNum,
+ unsigned PhiNum, MachineInstr *Phi,
+ unsigned OldReg, unsigned NewReg,
+ unsigned PrevReg = 0);
+ bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
+ unsigned &OffsetPos, unsigned &NewBase,
+ int64_t &NewOffset);
+ void postprocessDAG();
+};
+
+/// A NodeSet contains a set of SUnit DAG nodes with additional information
+/// that assigns a priority to the set.
+class NodeSet {
+ SetVector<SUnit *> Nodes;
+ bool HasRecurrence;
+ unsigned RecMII = 0;
+ int MaxMOV = 0;
+ int MaxDepth = 0;
+ unsigned Colocate = 0;
+ SUnit *ExceedPressure = nullptr;
+
+public:
+ typedef SetVector<SUnit *>::const_iterator iterator;
+
+ NodeSet() : Nodes(), HasRecurrence(false) {}
+
+ NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
+
+ bool insert(SUnit *SU) { return Nodes.insert(SU); }
+
+ void insert(iterator S, iterator E) { Nodes.insert(S, E); }
+
+ template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
+ return Nodes.remove_if(P);
+ }
+
+ unsigned count(SUnit *SU) const { return Nodes.count(SU); }
+
+ bool hasRecurrence() { return HasRecurrence; };
+
+ unsigned size() const { return Nodes.size(); }
+
+ bool empty() const { return Nodes.empty(); }
+
+ SUnit *getNode(unsigned i) const { return Nodes[i]; };
+
+ void setRecMII(unsigned mii) { RecMII = mii; };
+
+ void setColocate(unsigned c) { Colocate = c; };
+
+ void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
+
+ bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
+
+ int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
+
+ int getRecMII() { return RecMII; }
+
+ /// Summarize node functions for the entire node set.
+ void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
+ for (SUnit *SU : *this) {
+ MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
+ MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
+ }
+ }
+
+ void clear() {
+ Nodes.clear();
+ RecMII = 0;
+ HasRecurrence = false;
+ MaxMOV = 0;
+ MaxDepth = 0;
+ Colocate = 0;
+ ExceedPressure = nullptr;
+ }
+
+ operator SetVector<SUnit *> &() { return Nodes; }
+
+ /// Sort the node sets by importance. First, rank them by recurrence MII,
+ /// then by mobility (least mobile done first), and finally by depth.
+ /// Each node set may contain a colocate value which is used as the first
+ /// tie breaker, if it's set.
+ bool operator>(const NodeSet &RHS) const {
+ if (RecMII == RHS.RecMII) {
+ if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
+ return Colocate < RHS.Colocate;
+ if (MaxMOV == RHS.MaxMOV)
+ return MaxDepth > RHS.MaxDepth;
+ return MaxMOV < RHS.MaxMOV;
+ }
+ return RecMII > RHS.RecMII;
+ }
+
+ bool operator==(const NodeSet &RHS) const {
+ return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
+ MaxDepth == RHS.MaxDepth;
+ }
+
+ bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
+
+ iterator begin() { return Nodes.begin(); }
+ iterator end() { return Nodes.end(); }
+
+ void print(raw_ostream &os) const {
+ os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+ << " depth " << MaxDepth << " col " << Colocate << "\n";
+ for (const auto &I : Nodes)
+ os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
+ os << "\n";
+ }
+
+ void dump() const { print(dbgs()); }
+};
+
+/// This class repesents the scheduled code. The main data structure is a
+/// map from scheduled cycle to instructions. During scheduling, the
+/// data structure explicitly represents all stages/iterations. When
+/// the algorithm finshes, the schedule is collapsed into a single stage,
+/// which represents instructions from different loop iterations.
+///
+/// The SMS algorithm allows negative values for cycles, so the first cycle
+/// in the schedule is the smallest cycle value.
+class SMSchedule {
+private:
+ /// Map from execution cycle to instructions.
+ DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
+
+ /// Map from instruction to execution cycle.
+ std::map<SUnit *, int> InstrToCycle;
+
+ /// Map for each register and the max difference between its uses and def.
+ /// The first element in the pair is the max difference in stages. The
+ /// second is true if the register defines a Phi value and loop value is
+ /// scheduled before the Phi.
+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+ /// Keep track of the first cycle value in the schedule. It starts
+ /// as zero, but the algorithm allows negative values.
+ int FirstCycle;
+
+ /// Keep track of the last cycle value in the schedule.
+ int LastCycle;
+
+ /// The initiation interval (II) for the schedule.
+ int InitiationInterval;
+
+ /// Target machine information.
+ const TargetSubtargetInfo &ST;
+
+ /// Virtual register information.
+ MachineRegisterInfo &MRI;
+
+ DFAPacketizer *Resources;
+
+public:
+ SMSchedule(MachineFunction *mf)
+ : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+ Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
+ FirstCycle = 0;
+ LastCycle = 0;
+ InitiationInterval = 0;
+ }
+
+ ~SMSchedule() {
+ ScheduledInstrs.clear();
+ InstrToCycle.clear();
+ RegToStageDiff.clear();
+ delete Resources;
+ }
+
+ void reset() {
+ ScheduledInstrs.clear();
+ InstrToCycle.clear();
+ RegToStageDiff.clear();
+ FirstCycle = 0;
+ LastCycle = 0;
+ InitiationInterval = 0;
+ }
+
+ /// Set the initiation interval for this schedule.
+ void setInitiationInterval(int ii) { InitiationInterval = ii; }
+
+ /// Return the first cycle in the completed schedule. This
+ /// can be a negative value.
+ int getFirstCycle() const { return FirstCycle; }
+
+ /// Return the last cycle in the finalized schedule.
+ int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
+
+ /// Return the cycle of the earliest scheduled instruction in the dependence
+ /// chain.
+ int earliestCycleInChain(const SDep &Dep);
+
+ /// Return the cycle of the latest scheduled instruction in the dependence
+ /// chain.
+ int latestCycleInChain(const SDep &Dep);
+
+ void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+ int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+ bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+
+ /// Iterators for the cycle to instruction map.
+ typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
+ typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
+ const_sched_iterator;
+
+ /// Return true if the instruction is scheduled at the specified stage.
+ bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
+ return (stageScheduled(SU) == (int)StageNum);
+ }
+
+ /// Return the stage for a scheduled instruction. Return -1 if
+ /// the instruction has not been scheduled.
+ int stageScheduled(SUnit *SU) const {
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+ if (it == InstrToCycle.end())
+ return -1;
+ return (it->second - FirstCycle) / InitiationInterval;
+ }
+
+ /// Return the cycle for a scheduled instruction. This function normalizes
+ /// the first cycle to be 0.
+ unsigned cycleScheduled(SUnit *SU) const {
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+ assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
+ return (it->second - FirstCycle) % InitiationInterval;
+ }
+
+ /// Return the maximum stage count needed for this schedule.
+ unsigned getMaxStageCount() {
+ return (LastCycle - FirstCycle) / InitiationInterval;
+ }
+
+ /// Return the max. number of stages/iterations that can occur between a
+ /// register definition and its uses.
+ unsigned getStagesForReg(int Reg, unsigned CurStage) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
+ return 1;
+ return Stages.first;
+ }
+
+ /// The number of stages for a Phi is a little different than other
+ /// instructions. The minimum value computed in RegToStageDiff is 1
+ /// because we assume the Phi is needed for at least 1 iteration.
+ /// This is not the case if the loop value is scheduled prior to the
+ /// Phi in the same stage. This function returns the number of stages
+ /// or iterations needed between the Phi definition and any uses.
+ unsigned getStagesForPhi(int Reg) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (Stages.second)
+ return Stages.first;
+ return Stages.first - 1;
+ }
+
+ /// Return the instructions that are scheduled at the specified cycle.
+ std::deque<SUnit *> &getInstructions(int cycle) {
+ return ScheduledInstrs[cycle];
+ }
+
+ bool isValidSchedule(SwingSchedulerDAG *SSD);
+ void finalizeSchedule(SwingSchedulerDAG *SSD);
+ bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+ std::deque<SUnit *> &Insts);
+ bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
+ bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
+ MachineOperand &MO);
+ void print(raw_ostream &os) const;
+ void dump() const;
+};
+
+} // end anonymous namespace
+
+unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
+char MachinePipeliner::ID = 0;
+#ifndef NDEBUG
+int MachinePipeliner::NumTries = 0;
+#endif
+char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
+ "Modulo Software Pipelining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
+ "Modulo Software Pipelining", false, false)
+
+/// The "main" function for implementing Swing Modulo Scheduling.
+bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
+ if (!EnableSWP)
+ return false;
+
+ if (mf.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ !EnableSWPOptSize.getPosition())
+ return false;
+
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ TII = MF->getSubtarget().getInstrInfo();
+ RegClassInfo.runOnMachineFunction(*MF);
+
+ for (auto &L : *MLI)
+ scheduleLoop(*L);
+
+ return false;
+}
+
+/// Attempt to perform the SMS algorithm on the specified loop. This function is
+/// the main entry point for the algorithm. The function identifies candidate
+/// loops, calculates the minimum initiation interval, and attempts to schedule
+/// the loop.
+bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
+ bool Changed = false;
+ for (auto &InnerLoop : L)
+ Changed |= scheduleLoop(*InnerLoop);
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = SwpLoopLimit;
+ if (Limit >= 0) {
+ if (NumTries >= SwpLoopLimit)
+ return Changed;
+ NumTries++;
+ }
+#endif
+
+ if (!canPipelineLoop(L))
+ return Changed;
+
+ ++NumTrytoPipeline;
+
+ Changed = swingModuloScheduler(L);
+
+ return Changed;
+}
+
+/// Return true if the loop can be software pipelined. The algorithm is
+/// restricted to loops with a single basic block. Make sure that the
+/// branch in the loop can be analyzed.
+bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
+ if (L.getNumBlocks() != 1)
+ return false;
+
+ // Check if the branch can't be understood because we can't do pipelining
+ // if that's the case.
+ LI.TBB = nullptr;
+ LI.FBB = nullptr;
+ LI.BrCond.clear();
+ if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+ return false;
+
+ LI.LoopInductionVar = nullptr;
+ LI.LoopCompare = nullptr;
+ if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+ return false;
+
+ if (!L.getLoopPreheader())
+ return false;
+
+ // If any of the Phis contain subregs, then we can't pipeline
+ // because we don't know how to maintain subreg information in the
+ // VMap structure.
+ MachineBasicBlock *MBB = L.getHeader();
+ for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
+ BBE = MBB->getFirstNonPHI();
+ BBI != BBE; ++BBI)
+ for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
+ if (BBI->getOperand(i).getSubReg() != 0)
+ return false;
+
+ return true;
+}
+
+/// The SMS algorithm consists of the following main steps:
+/// 1. Computation and analysis of the dependence graph.
+/// 2. Ordering of the nodes (instructions).
+/// 3. Attempt to Schedule the loop.
+bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
+ assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+
+ SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+
+ MachineBasicBlock *MBB = L.getHeader();
+ // The kernel should not include any terminator instructions. These
+ // will be added back later.
+ SMS.startBlock(MBB);
+
+ // Compute the number of 'real' instructions in the basic block by
+ // ignoring terminators.
+ unsigned size = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
+ E = MBB->instr_end();
+ I != E; ++I, --size)
+ ;
+
+ SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
+ SMS.schedule();
+ SMS.exitRegion();
+
+ SMS.finishBlock();
+ return SMS.hasNewSchedule();
+}
+
+/// We override the schedule function in ScheduleDAGInstrs to implement the
+/// scheduling part of the Swing Modulo Scheduling algorithm.
+void SwingSchedulerDAG::schedule() {
+ AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
+ buildSchedGraph(AA);
+ addLoopCarriedDependences(AA);
+ updatePhiDependences();
+ Topo.InitDAGTopologicalSorting();
+ postprocessDAG();
+ changeDependences();
+ DEBUG({
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this);
+ });
+
+ NodeSetType NodeSets;
+ findCircuits(NodeSets);
+
+ // Calculate the MII.
+ unsigned ResMII = calculateResMII();
+ unsigned RecMII = calculateRecMII(NodeSets);
+
+ fuseRecs(NodeSets);
+
+ // This flag is used for testing and can cause correctness problems.
+ if (SwpIgnoreRecMII)
+ RecMII = 0;
+
+ MII = std::max(ResMII, RecMII);
+ DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
+ << ")\n");
+
+ // Can't schedule a loop without a valid MII.
+ if (MII == 0)
+ return;
+
+ // Don't pipeline large loops.
+ if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+ return;
+
+ computeNodeFunctions(NodeSets);
+
+ registerPressureFilter(NodeSets);
+
+ colocateNodeSets(NodeSets);
+
+ checkNodeSets(NodeSets);
+
+ DEBUG({
+ for (auto &I : NodeSets) {
+ dbgs() << " Rec NodeSet ";
+ I.dump();
+ }
+ });
+
+ std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+
+ groupRemainingNodes(NodeSets);
+
+ removeDuplicateNodes(NodeSets);
+
+ DEBUG({
+ for (auto &I : NodeSets) {
+ dbgs() << " NodeSet ";
+ I.dump();
+ }
+ });
+
+ computeNodeOrder(NodeSets);
+
+ SMSchedule Schedule(Pass.MF);
+ Scheduled = schedulePipeline(Schedule);
+
+ if (!Scheduled)
+ return;
+
+ unsigned numStages = Schedule.getMaxStageCount();
+ // No need to generate pipeline if there are no overlapped iterations.
+ if (numStages == 0)
+ return;
+
+ // Check that the maximum stage count is less than user-defined limit.
+ if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+ return;
+
+ generatePipelinedLoop(Schedule);
+ ++NumPipelined;
+}
+
+/// Clean up after the software pipeliner runs.
+void SwingSchedulerDAG::finishBlock() {
+ for (MachineInstr *I : NewMIs)
+ MF.DeleteMachineInstr(I);
+ NewMIs.clear();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// Return the register values for the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+ unsigned &InitVal, unsigned &LoopVal) {
+ assert(Phi.isPHI() && "Expecting a Phi.");
+
+ InitVal = 0;
+ LoopVal = 0;
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != Loop)
+ InitVal = Phi.getOperand(i).getReg();
+ else if (Phi.getOperand(i + 1).getMBB() == Loop)
+ LoopVal = Phi.getOperand(i).getReg();
+
+ assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return the Phi register value that comes the the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return true if SUb can be reached from SUa following the chain edges.
+static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SUnit *, 8> Worklist;
+ Worklist.push_back(SUa);
+ while (!Worklist.empty()) {
+ const SUnit *SU = Worklist.pop_back_val();
+ for (auto &SI : SU->Succs) {
+ SUnit *SuccSU = SI.getSUnit();
+ if (SI.getKind() == SDep::Order) {
+ if (Visited.count(SuccSU))
+ continue;
+ if (SuccSU == SUb)
+ return true;
+ Worklist.push_back(SuccSU);
+ Visited.insert(SuccSU);
+ }
+ }
+ }
+ return false;
+}
+
+/// Return true if the instruction causes a chain between memory
+/// references before and after it.
+static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
+ return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+ (MI.hasOrderedMemoryRef() &&
+ (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
+}
+
+/// Return the underlying objects for the memory references of an instruction.
+/// This function calls the code in ValueTracking, but first checks that the
+/// instruction has a memory operand.
+static void getUnderlyingObjects(MachineInstr *MI,
+ SmallVectorImpl<Value *> &Objs,
+ const DataLayout &DL) {
+ if (!MI->hasOneMemOperand())
+ return;
+ MachineMemOperand *MM = *MI->memoperands_begin();
+ if (!MM->getValue())
+ return;
+ GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+}
+
+/// Add a chain edge between a load and store if the store can be an
+/// alias of the load on a subsequent iteration, i.e., a loop carried
+/// dependence. This code is very similar to the code in ScheduleDAGInstrs
+/// but that code doesn't create loop carried dependences.
+void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
+ MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ for (auto &SU : SUnits) {
+ MachineInstr &MI = *SU.getInstr();
+ if (isDependenceBarrier(MI, AA))
+ PendingLoads.clear();
+ else if (MI.mayLoad()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ for (auto V : Objs) {
+ SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
+ SUs.push_back(&SU);
+ }
+ } else if (MI.mayStore()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ for (auto V : Objs) {
+ MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+ PendingLoads.find(V);
+ if (I == PendingLoads.end())
+ continue;
+ for (auto Load : I->second) {
+ if (isSuccOrder(Load, &SU))
+ continue;
+ MachineInstr &LdMI = *Load->getInstr();
+ // First, perform the cheaper check that compares the base register.
+ // If they are the same and the load offset is less than the store
+ // offset, then mark the dependence as loop carried potentially.
+ unsigned BaseReg1, BaseReg2;
+ int64_t Offset1, Offset2;
+ if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
+ !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+ SU.addPred(SDep(Load, SDep::Barrier));
+ continue;
+ }
+ if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ "What happened to the chain edge?");
+ SU.addPred(SDep(Load, SDep::Barrier));
+ continue;
+ }
+ // Second, the more expensive check that uses alias analysis on the
+ // base registers. If they alias, and the load offset is less than
+ // the store offset, the mark the dependence as loop carried.
+ if (!AA) {
+ SU.addPred(SDep(Load, SDep::Barrier));
+ continue;
+ }
+ MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
+ MachineMemOperand *MMO2 = *MI.memoperands_begin();
+ if (!MMO1->getValue() || !MMO2->getValue()) {
+ SU.addPred(SDep(Load, SDep::Barrier));
+ continue;
+ }
+ if (MMO1->getValue() == MMO2->getValue() &&
+ MMO1->getOffset() <= MMO2->getOffset()) {
+ SU.addPred(SDep(Load, SDep::Barrier));
+ continue;
+ }
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MMO2->getAAInfo()));
+
+ if (AAResult != NoAlias)
+ SU.addPred(SDep(Load, SDep::Barrier));
+ }
+ }
+ }
+ }
+}
+
+/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
+/// processes dependences for PHIs. This function adds true dependences
+/// from a PHI to a use, and a loop carried dependence from the use to the
+/// PHI. The loop carried dependence is represented as an anti dependence
+/// edge. This function also removes chain dependences between unrelated
+/// PHIs.
+void SwingSchedulerDAG::updatePhiDependences() {
+ SmallVector<SDep, 4> RemoveDeps;
+ const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
+
+ // Iterate over each DAG node.
+ for (SUnit &I : SUnits) {
+ RemoveDeps.clear();
+ // Set to true if the instruction has an operand defined by a Phi.
+ unsigned HasPhiUse = 0;
+ unsigned HasPhiDef = 0;
+ MachineInstr *MI = I.getInstr();
+ // Iterate over each operand, and we process the definitions.
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (MOI->isDef()) {
+ // If the register is used by a Phi, then create an anti dependence.
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = MRI.use_instr_begin(Reg),
+ UE = MRI.use_instr_end();
+ UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ SUnit *SU = getSUnit(UseMI);
+ if (SU != nullptr && UseMI->isPHI()) {
+ if (!MI->isPHI()) {
+ SDep Dep(SU, SDep::Anti, Reg);
+ I.addPred(Dep);
+ } else {
+ HasPhiDef = Reg;
+ // Add a chain edge to a dependent Phi that isn't an existing
+ // predecessor.
+ if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+ I.addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ } else if (MOI->isUse()) {
+ // If the register is defined by a Phi, then create a true dependence.
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+ if (DefMI == nullptr)
+ continue;
+ SUnit *SU = getSUnit(DefMI);
+ if (SU != nullptr && DefMI->isPHI()) {
+ if (!MI->isPHI()) {
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(0);
+ ST.adjustSchedDependency(SU, &I, Dep);
+ I.addPred(Dep);
+ } else {
+ HasPhiUse = Reg;
+ // Add a chain edge to a dependent Phi that isn't an existing
+ // predecessor.
+ if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+ I.addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ }
+ }
+ // Remove order dependences from an unrelated Phi.
+ if (!SwpPruneDeps)
+ continue;
+ for (auto &PI : I.Preds) {
+ MachineInstr *PMI = PI.getSUnit()->getInstr();
+ if (PMI->isPHI() && PI.getKind() == SDep::Order) {
+ if (I.getInstr()->isPHI()) {
+ if (PMI->getOperand(0).getReg() == HasPhiUse)
+ continue;
+ if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
+ continue;
+ }
+ RemoveDeps.push_back(PI);
+ }
+ }
+ for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
+ I.removePred(RemoveDeps[i]);
+ }
+}
+
+/// Iterate over each DAG node and see if we can change any dependences
+/// in order to reduce the recurrence MII.
+void SwingSchedulerDAG::changeDependences() {
+ // See if an instruction can use a value from the previous iteration.
+ // If so, we update the base and offset of the instruction and change
+ // the dependences.
+ for (SUnit &I : SUnits) {
+ unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
+ int64_t NewOffset = 0;
+ if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
+ NewOffset))
+ continue;
+
+ // Get the MI and SUnit for the instruction that defines the original base.
+ unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
+ if (!DefMI)
+ continue;
+ SUnit *DefSU = getSUnit(DefMI);
+ if (!DefSU)
+ continue;
+ // Get the MI and SUnit for the instruction that defins the new base.
+ MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
+ if (!LastMI)
+ continue;
+ SUnit *LastSU = getSUnit(LastMI);
+ if (!LastSU)
+ continue;
+
+ if (Topo.IsReachable(&I, LastSU))
+ continue;
+
+ // Remove the dependence. The value now depends on a prior iteration.
+ SmallVector<SDep, 4> Deps;
+ for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
+ ++P)
+ if (P->getSUnit() == DefSU)
+ Deps.push_back(*P);
+ for (int i = 0, e = Deps.size(); i != e; i++) {
+ Topo.RemovePred(&I, Deps[i].getSUnit());
+ I.removePred(Deps[i]);
+ }
+ // Remove the chain dependence between the instructions.
+ Deps.clear();
+ for (auto &P : LastSU->Preds)
+ if (P.getSUnit() == &I && P.getKind() == SDep::Order)
+ Deps.push_back(P);
+ for (int i = 0, e = Deps.size(); i != e; i++) {
+ Topo.RemovePred(LastSU, Deps[i].getSUnit());
+ LastSU->removePred(Deps[i]);
+ }
+
+ // Add a dependence between the new instruction and the instruction
+ // that defines the new base.
+ SDep Dep(&I, SDep::Anti, NewBase);
+ LastSU->addPred(Dep);
+
+ // Remember the base and offset information so that we can update the
+ // instruction during code generation.
+ InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
+ }
+}
+
+namespace {
+
+// FuncUnitSorter - Comparison operator used to sort instructions by
+// the number of functional unit choices.
+struct FuncUnitSorter {
+ const InstrItineraryData *InstrItins;
+ DenseMap<unsigned, unsigned> Resources;
+
+ // Compute the number of functional unit alternatives needed
+ // at each stage, and take the minimum value. We prioritize the
+ // instructions by the least number of choices first.
+ unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
+ unsigned schedClass = Inst->getDesc().getSchedClass();
+ unsigned min = UINT_MAX;
+ for (const InstrStage *IS = InstrItins->beginStage(schedClass),
+ *IE = InstrItins->endStage(schedClass);
+ IS != IE; ++IS) {
+ unsigned funcUnits = IS->getUnits();
+ unsigned numAlternatives = countPopulation(funcUnits);
+ if (numAlternatives < min) {
+ min = numAlternatives;
+ F = funcUnits;
+ }
+ }
+ return min;
+ }
+
+ // Compute the critical resources needed by the instruction. This
+ // function records the functional units needed by instructions that
+ // must use only one functional unit. We use this as a tie breaker
+ // for computing the resource MII. The instrutions that require
+ // the same, highly used, functional unit have high priority.
+ void calcCriticalResources(MachineInstr &MI) {
+ unsigned SchedClass = MI.getDesc().getSchedClass();
+ for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
+ *IE = InstrItins->endStage(SchedClass);
+ IS != IE; ++IS) {
+ unsigned FuncUnits = IS->getUnits();
+ if (countPopulation(FuncUnits) == 1)
+ Resources[FuncUnits]++;
+ }
+ }
+
+ FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+ /// Return true if IS1 has less priority than IS2.
+ bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
+ unsigned F1 = 0, F2 = 0;
+ unsigned MFUs1 = minFuncUnits(IS1, F1);
+ unsigned MFUs2 = minFuncUnits(IS2, F2);
+ if (MFUs1 == 1 && MFUs2 == 1)
+ return Resources.lookup(F1) < Resources.lookup(F2);
+ return MFUs1 > MFUs2;
+ }
+};
+
+} // end anonymous namespace
+
+/// Calculate the resource constrained minimum initiation interval for the
+/// specified loop. We use the DFA to model the resources needed for
+/// each instruction, and we ignore dependences. A different DFA is created
+/// for each cycle that is required. When adding a new instruction, we attempt
+/// to add it to each existing DFA, until a legal space is found. If the
+/// instruction cannot be reserved in an existing DFA, we create a new one.
+unsigned SwingSchedulerDAG::calculateResMII() {
+ SmallVector<DFAPacketizer *, 8> Resources;
+ MachineBasicBlock *MBB = Loop.getHeader();
+ Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+
+ // Sort the instructions by the number of available choices for scheduling,
+ // least to most. Use the number of critical resources as the tie breaker.
+ FuncUnitSorter FUS =
+ FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+ for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I)
+ FUS.calcCriticalResources(*I);
+ PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+ FuncUnitOrder(FUS);
+
+ for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I)
+ FuncUnitOrder.push(&*I);
+
+ while (!FuncUnitOrder.empty()) {
+ MachineInstr *MI = FuncUnitOrder.top();
+ FuncUnitOrder.pop();
+ if (TII->isZeroCost(MI->getOpcode()))
+ continue;
+ // Attempt to reserve the instruction in an existing DFA. At least one
+ // DFA is needed for each cycle.
+ unsigned NumCycles = getSUnit(MI)->Latency;
+ unsigned ReservedCycles = 0;
+ SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
+ SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+ for (unsigned C = 0; C < NumCycles; ++C)
+ while (RI != RE) {
+ if ((*RI++)->canReserveResources(*MI)) {
+ ++ReservedCycles;
+ break;
+ }
+ }
+ // Start reserving resources using existing DFAs.
+ for (unsigned C = 0; C < ReservedCycles; ++C) {
+ --RI;
+ (*RI)->reserveResources(*MI);
+ }
+ // Add new DFAs, if needed, to reserve resources.
+ for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+ DFAPacketizer *NewResource =
+ TII->CreateTargetScheduleState(MF.getSubtarget());
+ assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+ NewResource->reserveResources(*MI);
+ Resources.push_back(NewResource);
+ }
+ }
+ int Resmii = Resources.size();
+ // Delete the memory for each of the DFAs that were created earlier.
+ for (DFAPacketizer *RI : Resources) {
+ DFAPacketizer *D = RI;
+ delete D;
+ }
+ Resources.clear();
+ return Resmii;
+}
+
+/// Calculate the recurrence-constrainted minimum initiation interval.
+/// Iterate over each circuit. Compute the delay(c) and distance(c)
+/// for each circuit. The II needs to satisfy the inequality
+/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
+/// II that satistifies the inequality, and the RecMII is the maximum
+/// of those values.
+unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
+ unsigned RecMII = 0;
+
+ for (NodeSet &Nodes : NodeSets) {
+ if (Nodes.size() == 0)
+ continue;
+
+ unsigned Delay = Nodes.size() - 1;
+ unsigned Distance = 1;
+
+ // ii = ceil(delay / distance)
+ unsigned CurMII = (Delay + Distance - 1) / Distance;
+ Nodes.setRecMII(CurMII);
+ if (CurMII > RecMII)
+ RecMII = CurMII;
+ }
+
+ return RecMII;
+}
+
+/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+/// but we do this to find the circuits, and then change them back.
+static void swapAntiDependences(std::vector<SUnit> &SUnits) {
+ SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
+ IP != EP; ++IP) {
+ if (IP->getKind() != SDep::Anti)
+ continue;
+ DepsAdded.push_back(std::make_pair(SU, *IP));
+ }
+ }
+ for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
+ E = DepsAdded.end();
+ I != E; ++I) {
+ // Remove this anti dependency and add one in the reverse direction.
+ SUnit *SU = I->first;
+ SDep &D = I->second;
+ SUnit *TargetSU = D.getSUnit();
+ unsigned Reg = D.getReg();
+ unsigned Lat = D.getLatency();
+ SU->removePred(D);
+ SDep Dep(SU, SDep::Anti, Reg);
+ Dep.setLatency(Lat);
+ TargetSU->addPred(Dep);
+ }
+}
+
+/// Create the adjacency structure of the nodes in the graph.
+void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
+ SwingSchedulerDAG *DAG) {
+ BitVector Added(SUnits.size());
+ for (int i = 0, e = SUnits.size(); i != e; ++i) {
+ Added.reset();
+ // Add any successor to the adjacency matrix and exclude duplicates.
+ for (auto &SI : SUnits[i].Succs) {
+ // Do not process a boundary node and a back-edge is processed only
+ // if it goes to a Phi.
+ if (SI.getSUnit()->isBoundaryNode() ||
+ (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
+ continue;
+ int N = SI.getSUnit()->NodeNum;
+ if (!Added.test(N)) {
+ AdjK[i].push_back(N);
+ Added.set(N);
+ }
+ }
+ // A chain edge between a store and a load is treated as a back-edge in the
+ // adjacency matrix.
+ for (auto &PI : SUnits[i].Preds) {
+ if (!SUnits[i].getInstr()->mayStore() ||
+ !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
+ continue;
+ if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
+ int N = PI.getSUnit()->NodeNum;
+ if (!Added.test(N)) {
+ AdjK[i].push_back(N);
+ Added.set(N);
+ }
+ }
+ }
+ }
+}
+
+/// Identify an elementary circuit in the dependence graph starting at the
+/// specified node.
+bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
+ bool HasBackedge) {
+ SUnit *SV = &SUnits[V];
+ bool F = false;
+ Stack.insert(SV);
+ Blocked.set(V);
+
+ for (auto W : AdjK[V]) {
+ if (NumPaths > MaxPaths)
+ break;
+ if (W < S)
+ continue;
+ if (W == S) {
+ if (!HasBackedge)
+ NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
+ F = true;
+ ++NumPaths;
+ break;
+ } else if (!Blocked.test(W)) {
+ if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+ F = true;
+ }
+ }
+
+ if (F)
+ unblock(V);
+ else {
+ for (auto W : AdjK[V]) {
+ if (W < S)
+ continue;
+ if (B[W].count(SV) == 0)
+ B[W].insert(SV);
+ }
+ }
+ Stack.pop_back();
+ return F;
+}
+
+/// Unblock a node in the circuit finding algorithm.
+void SwingSchedulerDAG::Circuits::unblock(int U) {
+ Blocked.reset(U);
+ SmallPtrSet<SUnit *, 4> &BU = B[U];
+ while (!BU.empty()) {
+ SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
+ assert(SI != BU.end() && "Invalid B set.");
+ SUnit *W = *SI;
+ BU.erase(W);
+ if (Blocked.test(W->NodeNum))
+ unblock(W->NodeNum);
+ }
+}
+
+/// Identify all the elementary circuits in the dependence graph using
+/// Johnson's circuit algorithm.
+void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
+ // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+ // but we do this to find the circuits, and then change them back.
+ swapAntiDependences(SUnits);
+
+ Circuits Cir(SUnits);
+ // Create the adjacency structure.
+ Cir.createAdjacencyStructure(this);
+ for (int i = 0, e = SUnits.size(); i != e; ++i) {
+ Cir.reset();
+ Cir.circuit(i, i, NodeSets);
+ }
+
+ // Change the dependences back so that we've created a DAG again.
+ swapAntiDependences(SUnits);
+}
+
+/// Return true for DAG nodes that we ignore when computing the cost functions.
+/// We ignore the back-edge recurrence in order to avoid unbounded recurison
+/// in the calculation of the ASAP, ALAP, etc functions.
+static bool ignoreDependence(const SDep &D, bool isPred) {
+ if (D.isArtificial())
+ return true;
+ return D.getKind() == SDep::Anti && isPred;
+}
+
+/// Compute several functions need to order the nodes for scheduling.
+/// ASAP - Earliest time to schedule a node.
+/// ALAP - Latest time to schedule a node.
+/// MOV - Mobility function, difference between ALAP and ASAP.
+/// D - Depth of each node.
+/// H - Height of each node.
+void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
+
+ ScheduleInfo.resize(SUnits.size());
+
+ DEBUG({
+ for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+ E = Topo.end();
+ I != E; ++I) {
+ SUnit *SU = &SUnits[*I];
+ SU->dump(this);
+ }
+ });
+
+ int maxASAP = 0;
+ // Compute ASAP.
+ for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+ E = Topo.end();
+ I != E; ++I) {
+ int asap = 0;
+ SUnit *SU = &SUnits[*I];
+ for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
+ EP = SU->Preds.end();
+ IP != EP; ++IP) {
+ if (ignoreDependence(*IP, true))
+ continue;
+ SUnit *pred = IP->getSUnit();
+ asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
+ getDistance(pred, SU, *IP) * MII));
+ }
+ maxASAP = std::max(maxASAP, asap);
+ ScheduleInfo[*I].ASAP = asap;
+ }
+
+ // Compute ALAP and MOV.
+ for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
+ E = Topo.rend();
+ I != E; ++I) {
+ int alap = maxASAP;
+ SUnit *SU = &SUnits[*I];
+ for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
+ ES = SU->Succs.end();
+ IS != ES; ++IS) {
+ if (ignoreDependence(*IS, true))
+ continue;
+ SUnit *succ = IS->getSUnit();
+ alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
+ getDistance(SU, succ, *IS) * MII));
+ }
+
+ ScheduleInfo[*I].ALAP = alap;
+ }
+
+ // After computing the node functions, compute the summary for each node set.
+ for (NodeSet &I : NodeSets)
+ I.computeNodeSetInfo(this);
+
+ DEBUG({
+ for (unsigned i = 0; i < SUnits.size(); i++) {
+ dbgs() << "\tNode " << i << ":\n";
+ dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
+ dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
+ dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
+ dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
+ dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
+ }
+ });
+}
+
+/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
+/// as the predecessors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool pred_L(SetVector<SUnit *> &NodeOrder,
+ SmallSetVector<SUnit *, 8> &Preds,
+ const NodeSet *S = nullptr) {
+ Preds.clear();
+ for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+ I != E; ++I) {
+ for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
+ PI != PE; ++PI) {
+ if (S && S->count(PI->getSUnit()) == 0)
+ continue;
+ if (ignoreDependence(*PI, true))
+ continue;
+ if (NodeOrder.count(PI->getSUnit()) == 0)
+ Preds.insert(PI->getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
+ ES = (*I)->Succs.end();
+ IS != ES; ++IS) {
+ if (IS->getKind() != SDep::Anti)
+ continue;
+ if (S && S->count(IS->getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(IS->getSUnit()) == 0)
+ Preds.insert(IS->getSUnit());
+ }
+ }
+ return Preds.size() > 0;
+}
+
+/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
+/// as the successors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool succ_L(SetVector<SUnit *> &NodeOrder,
+ SmallSetVector<SUnit *, 8> &Succs,
+ const NodeSet *S = nullptr) {
+ Succs.clear();
+ for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+ I != E; ++I) {
+ for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
+ SI != SE; ++SI) {
+ if (S && S->count(SI->getSUnit()) == 0)
+ continue;
+ if (ignoreDependence(*SI, false))
+ continue;
+ if (NodeOrder.count(SI->getSUnit()) == 0)
+ Succs.insert(SI->getSUnit());
+ }
+ for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
+ PE = (*I)->Preds.end();
+ PI != PE; ++PI) {
+ if (PI->getKind() != SDep::Anti)
+ continue;
+ if (S && S->count(PI->getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(PI->getSUnit()) == 0)
+ Succs.insert(PI->getSUnit());
+ }
+ }
+ return Succs.size() > 0;
+}
+
+/// Return true if there is a path from the specified node to any of the nodes
+/// in DestNodes. Keep track and return the nodes in any path.
+static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
+ SetVector<SUnit *> &DestNodes,
+ SetVector<SUnit *> &Exclude,
+ SmallPtrSet<SUnit *, 8> &Visited) {
+ if (Cur->isBoundaryNode())
+ return false;
+ if (Exclude.count(Cur) != 0)
+ return false;
+ if (DestNodes.count(Cur) != 0)
+ return true;
+ if (!Visited.insert(Cur).second)
+ return Path.count(Cur) != 0;
+ bool FoundPath = false;
+ for (auto &SI : Cur->Succs)
+ FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ for (auto &PI : Cur->Preds)
+ if (PI.getKind() == SDep::Anti)
+ FoundPath |=
+ computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ if (FoundPath)
+ Path.insert(Cur);
+ return FoundPath;
+}
+
+/// Return true if Set1 is a subset of Set2.
+template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
+ for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
+ if (Set2.count(*I) == 0)
+ return false;
+ return true;
+}
+
+/// Compute the live-out registers for the instructions in a node-set.
+/// The live-out registers are those that are defined in the node-set,
+/// but not used. Except for use operands of Phis.
+static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
+ NodeSet &NS) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<RegisterMaskPair, 8> LiveOutRegs;
+ SmallSet<unsigned, 4> Uses;
+ for (SUnit *SU : NS) {
+ const MachineInstr *MI = SU->getInstr();
+ if (MI->isPHI())
+ continue;
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Uses.insert(Reg);
+ else if (MRI.isAllocatable(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ Uses.insert(*Units);
+ }
+ }
+ for (SUnit *SU : NS)
+ for (const MachineOperand &MO : SU->getInstr()->operands())
+ if (MO.isReg() && MO.isDef() && !MO.isDead()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Uses.count(Reg))
+ LiveOutRegs.push_back(RegisterMaskPair(Reg,
+ LaneBitmask::getNone()));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (!Uses.count(*Units))
+ LiveOutRegs.push_back(RegisterMaskPair(*Units,
+ LaneBitmask::getNone()));
+ }
+ }
+ RPTracker.addLiveRegs(LiveOutRegs);
+}
+
+/// A heuristic to filter nodes in recurrent node-sets if the register
+/// pressure of a set is too high.
+void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
+ for (auto &NS : NodeSets) {
+ // Skip small node-sets since they won't cause register pressure problems.
+ if (NS.size() <= 2)
+ continue;
+ IntervalPressure RecRegPressure;
+ RegPressureTracker RecRPTracker(RecRegPressure);
+ RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
+ computeLiveOuts(MF, RecRPTracker, NS);
+ RecRPTracker.closeBottom();
+
+ std::vector<SUnit *> SUnits(NS.begin(), NS.end());
+ std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
+ return A->NodeNum > B->NodeNum;
+ });
+
+ for (auto &SU : SUnits) {
+ // Since we're computing the register pressure for a subset of the
+ // instructions in a block, we need to set the tracker for each
+ // instruction in the node-set. The tracker is set to the instruction
+ // just after the one we're interested in.
+ MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
+ RecRPTracker.setPos(std::next(CurInstI));
+
+ RegPressureDelta RPDelta;
+ ArrayRef<PressureChange> CriticalPSets;
+ RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
+ CriticalPSets,
+ RecRegPressure.MaxSetPressure);
+ if (RPDelta.Excess.isValid()) {
+ DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
+ << ":" << RPDelta.Excess.getUnitInc());
+ NS.setExceedPressure(SU);
+ break;
+ }
+ RecRPTracker.recede();
+ }
+ }
+}
+
+/// A heuristic to colocate node sets that have the same set of
+/// successors.
+void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
+ unsigned Colocate = 0;
+ for (int i = 0, e = NodeSets.size(); i < e; ++i) {
+ NodeSet &N1 = NodeSets[i];
+ SmallSetVector<SUnit *, 8> S1;
+ if (N1.empty() || !succ_L(N1, S1))
+ continue;
+ for (int j = i + 1; j < e; ++j) {
+ NodeSet &N2 = NodeSets[j];
+ if (N1.compareRecMII(N2) != 0)
+ continue;
+ SmallSetVector<SUnit *, 8> S2;
+ if (N2.empty() || !succ_L(N2, S2))
+ continue;
+ if (isSubset(S1, S2) && S1.size() == S2.size()) {
+ N1.setColocate(++Colocate);
+ N2.setColocate(Colocate);
+ break;
+ }
+ }
+ }
+}
+
+/// Check if the existing node-sets are profitable. If not, then ignore the
+/// recurrent node-sets, and attempt to schedule all nodes together. This is
+/// a heuristic. If the MII is large and there is a non-recurrent node with
+/// a large depth compared to the MII, then it's best to try and schedule
+/// all instruction together instead of starting with the recurrent node-sets.
+void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
+ // Look for loops with a large MII.
+ if (MII <= 20)
+ return;
+ // Check if the node-set contains only a simple add recurrence.
+ for (auto &NS : NodeSets)
+ if (NS.size() > 2)
+ return;
+ // If the depth of any instruction is significantly larger than the MII, then
+ // ignore the recurrent node-sets and treat all instructions equally.
+ for (auto &SU : SUnits)
+ if (SU.getDepth() > MII * 1.5) {
+ NodeSets.clear();
+ DEBUG(dbgs() << "Clear recurrence node-sets\n");
+ return;
+ }
+}
+
+/// Add the nodes that do not belong to a recurrence set into groups
+/// based upon connected componenets.
+void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
+ SetVector<SUnit *> NodesAdded;
+ SmallPtrSet<SUnit *, 8> Visited;
+ // Add the nodes that are on a path between the previous node sets and
+ // the current node set.
+ for (NodeSet &I : NodeSets) {
+ SmallSetVector<SUnit *, 8> N;
+ // Add the nodes from the current node set to the previous node set.
+ if (succ_L(I, N)) {
+ SetVector<SUnit *> Path;
+ for (SUnit *NI : N) {
+ Visited.clear();
+ computePath(NI, Path, NodesAdded, I, Visited);
+ }
+ if (Path.size() > 0)
+ I.insert(Path.begin(), Path.end());
+ }
+ // Add the nodes from the previous node set to the current node set.
+ N.clear();
+ if (succ_L(NodesAdded, N)) {
+ SetVector<SUnit *> Path;
+ for (SUnit *NI : N) {
+ Visited.clear();
+ computePath(NI, Path, I, NodesAdded, Visited);
+ }
+ if (Path.size() > 0)
+ I.insert(Path.begin(), Path.end());
+ }
+ NodesAdded.insert(I.begin(), I.end());
+ }
+
+ // Create a new node set with the connected nodes of any successor of a node
+ // in a recurrent set.
+ NodeSet NewSet;
+ SmallSetVector<SUnit *, 8> N;
+ if (succ_L(NodesAdded, N))
+ for (SUnit *I : N)
+ addConnectedNodes(I, NewSet, NodesAdded);
+ if (NewSet.size() > 0)
+ NodeSets.push_back(NewSet);
+
+ // Create a new node set with the connected nodes of any predecessor of a node
+ // in a recurrent set.
+ NewSet.clear();
+ if (pred_L(NodesAdded, N))
+ for (SUnit *I : N)
+ addConnectedNodes(I, NewSet, NodesAdded);
+ if (NewSet.size() > 0)
+ NodeSets.push_back(NewSet);
+
+ // Create new nodes sets with the connected nodes any any remaining node that
+ // has no predecessor.
+ for (unsigned i = 0; i < SUnits.size(); ++i) {
+ SUnit *SU = &SUnits[i];
+ if (NodesAdded.count(SU) == 0) {
+ NewSet.clear();
+ addConnectedNodes(SU, NewSet, NodesAdded);
+ if (NewSet.size() > 0)
+ NodeSets.push_back(NewSet);
+ }
+ }
+}
+
+/// Add the node to the set, and add all is its connected nodes to the set.
+void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+ SetVector<SUnit *> &NodesAdded) {
+ NewSet.insert(SU);
+ NodesAdded.insert(SU);
+ for (auto &SI : SU->Succs) {
+ SUnit *Successor = SI.getSUnit();
+ if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
+ addConnectedNodes(Successor, NewSet, NodesAdded);
+ }
+ for (auto &PI : SU->Preds) {
+ SUnit *Predecessor = PI.getSUnit();
+ if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
+ addConnectedNodes(Predecessor, NewSet, NodesAdded);
+ }
+}
+
+/// Return true if Set1 contains elements in Set2. The elements in common
+/// are returned in a different container.
+static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
+ SmallSetVector<SUnit *, 8> &Result) {
+ Result.clear();
+ for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
+ SUnit *SU = Set1[i];
+ if (Set2.count(SU) != 0)
+ Result.insert(SU);
+ }
+ return !Result.empty();
+}
+
+/// Merge the recurrence node sets that have the same initial node.
+void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
+ for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+ ++I) {
+ NodeSet &NI = *I;
+ for (NodeSetType::iterator J = I + 1; J != E;) {
+ NodeSet &NJ = *J;
+ if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
+ if (NJ.compareRecMII(NI) > 0)
+ NI.setRecMII(NJ.getRecMII());
+ for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
+ ++NII)
+ I->insert(*NII);
+ NodeSets.erase(J);
+ E = NodeSets.end();
+ } else {
+ ++J;
+ }
+ }
+ }
+}
+
+/// Remove nodes that have been scheduled in previous NodeSets.
+void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
+ for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+ ++I)
+ for (NodeSetType::iterator J = I + 1; J != E;) {
+ J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
+
+ if (J->size() == 0) {
+ NodeSets.erase(J);
+ E = NodeSets.end();
+ } else {
+ ++J;
+ }
+ }
+}
+
+/// Return true if Inst1 defines a value that is used in Inst2.
+static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
+ for (auto &SI : Inst1->Succs)
+ if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
+ return true;
+ return false;
+}
+
+/// Compute an ordered list of the dependence graph nodes, which
+/// indicates the order that the nodes will be scheduled. This is a
+/// two-level algorithm. First, a partial order is created, which
+/// consists of a list of sets ordered from highest to lowest priority.
+void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
+ SmallSetVector<SUnit *, 8> R;
+ NodeOrder.clear();
+
+ for (auto &Nodes : NodeSets) {
+ DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
+ OrderKind Order;
+ SmallSetVector<SUnit *, 8> N;
+ if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
+ R.insert(N.begin(), N.end());
+ Order = BottomUp;
+ DEBUG(dbgs() << " Bottom up (preds) ");
+ } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
+ R.insert(N.begin(), N.end());
+ Order = TopDown;
+ DEBUG(dbgs() << " Top down (succs) ");
+ } else if (isIntersect(N, Nodes, R)) {
+ // If some of the successors are in the existing node-set, then use the
+ // top-down ordering.
+ Order = TopDown;
+ DEBUG(dbgs() << " Top down (intersect) ");
+ } else if (NodeSets.size() == 1) {
+ for (auto &N : Nodes)
+ if (N->Succs.size() == 0)
+ R.insert(N);
+ Order = BottomUp;
+ DEBUG(dbgs() << " Bottom up (all) ");
+ } else {
+ // Find the node with the highest ASAP.
+ SUnit *maxASAP = nullptr;
+ for (SUnit *SU : Nodes) {
+ if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
+ maxASAP = SU;
+ }
+ R.insert(maxASAP);
+ Order = BottomUp;
+ DEBUG(dbgs() << " Bottom up (default) ");
+ }
+
+ while (!R.empty()) {
+ if (Order == TopDown) {
+ // Choose the node with the maximum height. If more than one, choose
+ // the node with the lowest MOV. If still more than one, check if there
+ // is a dependence between the instructions.
+ while (!R.empty()) {
+ SUnit *maxHeight = nullptr;
+ for (SUnit *I : R) {
+ if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
+ maxHeight = I;
+ else if (getHeight(I) == getHeight(maxHeight) &&
+ getMOV(I) < getMOV(maxHeight) &&
+ !hasDataDependence(maxHeight, I))
+ maxHeight = I;
+ else if (hasDataDependence(I, maxHeight))
+ maxHeight = I;
+ }
+ NodeOrder.insert(maxHeight);
+ DEBUG(dbgs() << maxHeight->NodeNum << " ");
+ R.remove(maxHeight);
+ for (const auto &I : maxHeight->Succs) {
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(I.getSUnit()) != 0)
+ continue;
+ if (ignoreDependence(I, false))
+ continue;
+ R.insert(I.getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (const auto &I : maxHeight->Preds) {
+ if (I.getKind() != SDep::Anti)
+ continue;
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(I.getSUnit()) != 0)
+ continue;
+ R.insert(I.getSUnit());
+ }
+ }
+ Order = BottomUp;
+ DEBUG(dbgs() << "\n Switching order to bottom up ");
+ SmallSetVector<SUnit *, 8> N;
+ if (pred_L(NodeOrder, N, &Nodes))
+ R.insert(N.begin(), N.end());
+ } else {
+ // Choose the node with the maximum depth. If more than one, choose
+ // the node with the lowest MOV. If there is still more than one, check
+ // for a dependence between the instructions.
+ while (!R.empty()) {
+ SUnit *maxDepth = nullptr;
+ for (SUnit *I : R) {
+ if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
+ maxDepth = I;
+ else if (getDepth(I) == getDepth(maxDepth) &&
+ getMOV(I) < getMOV(maxDepth) &&
+ !hasDataDependence(I, maxDepth))
+ maxDepth = I;
+ else if (hasDataDependence(maxDepth, I))
+ maxDepth = I;
+ }
+ NodeOrder.insert(maxDepth);
+ DEBUG(dbgs() << maxDepth->NodeNum << " ");
+ R.remove(maxDepth);
+ if (Nodes.isExceedSU(maxDepth)) {
+ Order = TopDown;
+ R.clear();
+ R.insert(Nodes.getNode(0));
+ break;
+ }
+ for (const auto &I : maxDepth->Preds) {
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(I.getSUnit()) != 0)
+ continue;
+ if (I.getKind() == SDep::Anti)
+ continue;
+ R.insert(I.getSUnit());
+ }
+ // Back-edges are predecessors with an anti-dependence.
+ for (const auto &I : maxDepth->Succs) {
+ if (I.getKind() != SDep::Anti)
+ continue;
+ if (Nodes.count(I.getSUnit()) == 0)
+ continue;
+ if (NodeOrder.count(I.getSUnit()) != 0)
+ continue;
+ R.insert(I.getSUnit());
+ }
+ }
+ Order = TopDown;
+ DEBUG(dbgs() << "\n Switching order to top down ");
+ SmallSetVector<SUnit *, 8> N;
+ if (succ_L(NodeOrder, N, &Nodes))
+ R.insert(N.begin(), N.end());
+ }
+ }
+ DEBUG(dbgs() << "\nDone with Nodeset\n");
+ }
+
+ DEBUG({
+ dbgs() << "Node order: ";
+ for (SUnit *I : NodeOrder)
+ dbgs() << " " << I->NodeNum << " ";
+ dbgs() << "\n";
+ });
+}
+
+/// Process the nodes in the computed order and create the pipelined schedule
+/// of the instructions, if possible. Return true if a schedule is found.
+bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
+
+ if (NodeOrder.size() == 0)
+ return false;
+
+ bool scheduleFound = false;
+ // Keep increasing II until a valid schedule is found.
+ for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+ Schedule.reset();
+ Schedule.setInitiationInterval(II);
+ DEBUG(dbgs() << "Try to schedule with " << II << "\n");
+
+ SetVector<SUnit *>::iterator NI = NodeOrder.begin();
+ SetVector<SUnit *>::iterator NE = NodeOrder.end();
+ do {
+ SUnit *SU = *NI;
+
+ // Compute the schedule time for the instruction, which is based
+ // upon the scheduled time for any predecessors/successors.
+ int EarlyStart = INT_MIN;
+ int LateStart = INT_MAX;
+ // These values are set when the size of the schedule window is limited
+ // due to chain dependences.
+ int SchedEnd = INT_MAX;
+ int SchedStart = INT_MIN;
+ Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
+ II, this);
+ DEBUG({
+ dbgs() << "Inst (" << SU->NodeNum << ") ";
+ SU->getInstr()->dump();
+ dbgs() << "\n";
+ });
+ DEBUG({
+ dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
+ << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+ });
+
+ if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
+ SchedStart > LateStart)
+ scheduleFound = false;
+ else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
+ SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
+ scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
+ SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
+ scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
+ } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+ SchedEnd =
+ std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
+ // When scheduling a Phi it is better to start at the late cycle and go
+ // backwards. The default order may insert the Phi too far away from
+ // its first dependence.
+ if (SU->getInstr()->isPHI())
+ scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
+ else
+ scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ } else {
+ int FirstCycle = Schedule.getFirstCycle();
+ scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
+ FirstCycle + getASAP(SU) + II - 1, II);
+ }
+ // Even if we find a schedule, make sure the schedule doesn't exceed the
+ // allowable number of stages. We keep trying if this happens.
+ if (scheduleFound)
+ if (SwpMaxStages > -1 &&
+ Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
+ scheduleFound = false;
+
+ DEBUG({
+ if (!scheduleFound)
+ dbgs() << "\tCan't schedule\n";
+ });
+ } while (++NI != NE && scheduleFound);
+
+ // If a schedule is found, check if it is a valid schedule too.
+ if (scheduleFound)
+ scheduleFound = Schedule.isValidSchedule(this);
+ }
+
+ DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+
+ if (scheduleFound)
+ Schedule.finalizeSchedule(this);
+ else
+ Schedule.reset();
+
+ return scheduleFound && Schedule.getMaxStageCount() > 0;
+}
+
+/// Given a schedule for the loop, generate a new version of the loop,
+/// and replace the old version. This function generates a prolog
+/// that contains the initial iterations in the pipeline, and kernel
+/// loop, and the epilogue that contains the code for the final
+/// iterations.
+void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
+ // Create a new basic block for the kernel and add it to the CFG.
+ MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+ unsigned MaxStageCount = Schedule.getMaxStageCount();
+
+ // Remember the registers that are used in different stages. The index is
+ // the iteration, or stage, that the instruction is scheduled in. This is
+ // a map between register names in the orignal block and the names created
+ // in each stage of the pipelined loop.
+ ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+ InstrMapTy InstrMap;
+
+ SmallVector<MachineBasicBlock *, 4> PrologBBs;
+ // Generate the prolog instructions that set up the pipeline.
+ generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
+ MF.insert(BB->getIterator(), KernelBB);
+
+ // Rearrange the instructions to generate the new, pipelined loop,
+ // and update register names as needed.
+ for (int Cycle = Schedule.getFirstCycle(),
+ LastCycle = Schedule.getFinalCycle();
+ Cycle <= LastCycle; ++Cycle) {
+ std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
+ // This inner loop schedules each instruction in the cycle.
+ for (SUnit *CI : CycleInstrs) {
+ if (CI->getInstr()->isPHI())
+ continue;
+ unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
+ MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
+ updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = CI->getInstr();
+ }
+ }
+
+ // Copy any terminator instructions to the new kernel, and update
+ // names as needed.
+ for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
+ E = BB->instr_end();
+ I != E; ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = &*I;
+ }
+
+ KernelBB->transferSuccessors(BB);
+ KernelBB->replaceSuccessor(BB, KernelBB);
+
+ generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
+ VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
+ InstrMap, MaxStageCount, MaxStageCount, false);
+
+ DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+ SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+ // Generate the epilog instructions to complete the pipeline.
+ generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
+ PrologBBs);
+
+ // We need this step because the register allocation doesn't handle some
+ // situations well, so we insert copies to help out.
+ splitLifetimes(KernelBB, EpilogBBs, Schedule);
+
+ // Remove dead instructions due to loop induction variables.
+ removeDeadInstructions(KernelBB, EpilogBBs);
+
+ // Add branches between prolog and epilog blocks.
+ addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+
+ // Remove the original loop since it's no longer referenced.
+ BB->clear();
+ BB->eraseFromParent();
+
+ delete[] VRMap;
+}
+
+/// Generate the pipeline prolog code.
+void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs) {
+ MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+ assert(PreheaderBB != NULL &&
+ "Need to add code to handle loops w/o preheader");
+ MachineBasicBlock *PredBB = PreheaderBB;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which will be generated in the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ for (unsigned i = 0; i < LastStage; ++i) {
+ // Create and insert the prolog basic block prior to the original loop
+ // basic block. The original loop is removed later.
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ PrologBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+ NewBB->transferSuccessors(PredBB);
+ PredBB->addSuccessor(NewBB);
+ PredBB = NewBB;
+
+ // Generate instructions for each appropriate stage. Process instructions
+ // in original program order.
+ for (int StageNum = i; StageNum >= 0; --StageNum) {
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstTerminator();
+ BBI != BBE; ++BBI) {
+ if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
+ if (BBI->isPHI())
+ continue;
+ MachineInstr *NewMI =
+ cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
+ updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
+ VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = &*BBI;
+ }
+ }
+ }
+ rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
+ DEBUG({
+ dbgs() << "prolog:\n";
+ NewBB->dump();
+ });
+ }
+
+ PredBB->replaceSuccessor(BB, KernelBB);
+
+ // Check if we need to remove the branch from the preheader to the original
+ // loop, and replace it with a branch to the new loop.
+ unsigned numBranches = TII->removeBranch(*PreheaderBB);
+ if (numBranches) {
+ SmallVector<MachineOperand, 0> Cond;
+ TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
+ }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel. We create a basic
+/// block for each stage that needs to complete.
+void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
+ // We need to change the branch from the kernel to the first epilog block, so
+ // this call to analyze branch uses the kernel rather than the original BB.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+ assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+ if (checkBranch)
+ return;
+
+ MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+ if (*LoopExitI == KernelBB)
+ ++LoopExitI;
+ assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+ MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+ MachineBasicBlock *PredBB = KernelBB;
+ MachineBasicBlock *EpilogStart = LoopExitBB;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which was generated for the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ int EpilogStage = LastStage + 1;
+ for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+ EpilogBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+
+ PredBB->replaceSuccessor(LoopExitBB, NewBB);
+ NewBB->addSuccessor(LoopExitBB);
+
+ if (EpilogStart == LoopExitBB)
+ EpilogStart = NewBB;
+
+ // Add instructions to the epilog depending on the current block.
+ // Process instructions in original program order.
+ for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+ for (auto &BBI : *BB) {
+ if (BBI.isPHI())
+ continue;
+ MachineInstr *In = &BBI;
+ if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
+ MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
+ updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = In;
+ }
+ }
+ }
+ generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
+ VRMap, InstrMap, LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
+ InstrMap, LastStage, EpilogStage, i == 1);
+ PredBB = NewBB;
+
+ DEBUG({
+ dbgs() << "epilog:\n";
+ NewBB->dump();
+ });
+ }
+
+ // Fix any Phi nodes in the loop exit block.
+ for (MachineInstr &MI : *LoopExitBB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.getMBB() == BB)
+ MO.setMBB(PredBB);
+ }
+ }
+
+ // Create a branch to the new epilog from the kernel.
+ // Remove the original branch and add a new branch to the epilog.
+ TII->removeBranch(*KernelBB);
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ // Add a branch to the loop exit.
+ if (EpilogBBs.size() > 0) {
+ MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+ SmallVector<MachineOperand, 4> Cond1;
+ TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+ }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock *MBB,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
+ E = MRI.use_end();
+ I != E;) {
+ MachineOperand &O = *I;
+ ++I;
+ if (O.getParent()->getParent() != MBB)
+ O.setReg(ToReg);
+ }
+ if (!LIS.hasInterval(ToReg))
+ LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+ MachineRegisterInfo &MRI) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+ E = MRI.use_end();
+ I != E; ++I)
+ if (I->getParent()->getParent() != BB)
+ return true;
+ return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void SwingSchedulerDAG::generateExistingPhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast) {
+ // Compute the stage number for the inital value of the Phi, which
+ // comes from the prolog. The prolog to use depends on to which kernel/
+ // epilog that we're adding the Phi.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ bool InKernel = (LastStageNum == CurStageNum);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+ PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstNonPHI();
+ BBI != BBE; ++BBI) {
+ unsigned Def = BBI->getOperand(0).getReg();
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+ unsigned PhiOp1 = 0;
+ // The Phi value from the loop body typically is defined in the loop, but
+ // not always. So, we need to check if the value is defined in the loop.
+ unsigned PhiOp2 = LoopVal;
+ if (VRMap[LastStageNum].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+ int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+ int LoopValStage =
+ Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+ unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
+ if (NumStages == 0) {
+ // We don't need to generate a Phi anymore, but we need to rename any uses
+ // of the Phi value.
+ unsigned NewReg = VRMap[PrevStage][LoopVal];
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
+ Def, NewReg);
+ if (VRMap[CurStageNum].count(LoopVal))
+ VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+ }
+ // Adjust the number of Phis needed depending on the number of prologs left,
+ // and the distance from where the Phi is first scheduled.
+ unsigned NumPhis = NumStages;
+ if (!InKernel && (int)PrologStage < LoopValStage)
+ // The NumPhis is the maximum number of new Phis needed during the steady
+ // state. If the Phi has not been scheduled in current prolog, then we
+ // need to generate less Phis.
+ NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
+ // The number of Phis cannot exceed the number of prolog stages. Each
+ // stage can potentially define two values.
+ NumPhis = std::min(NumPhis, PrologStage + 2);
+
+ unsigned NewReg = 0;
+
+ unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+ // In the epilog, we may need to look back one stage to get the correct
+ // Phi name because the epilog and prolog blocks execute the same stage.
+ // The correct name is from the previous block only when the Phi has
+ // been completely scheduled prior to the epilog, and Phi value is not
+ // needed in multiple stages.
+ int StageDiff = 0;
+ if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+ NumPhis == 1)
+ StageDiff = 1;
+ // Adjust the computations below when the phi and the loop definition
+ // are scheduled in different stages.
+ if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiff = StageScheduled - LoopValStage;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ // If the Phi hasn't been scheduled, then use the initial Phi operand
+ // value. Otherwise, use the scheduled version of the instruction. This
+ // is a little complicated when a Phi references another Phi.
+ if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+ PhiOp1 = InitVal;
+ // Check if the Phi has already been scheduled in a prolog stage.
+ else if (PrologStage >= AccessStage + StageDiff + np &&
+ VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+ PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+ // Check if the Phi has already been scheduled, but the loop intruction
+ // is either another Phi, or doesn't occur in the loop.
+ else if (PrologStage >= AccessStage + StageDiff + np) {
+ // If the Phi references another Phi, we need to examine the other
+ // Phi to get the correct value.
+ PhiOp1 = LoopVal;
+ MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+ int Indirects = 1;
+ while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+ int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
+ if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+ PhiOp1 = getInitPhiReg(*InstOp1, BB);
+ else
+ PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+ InstOp1 = MRI.getVRegDef(PhiOp1);
+ int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
+ int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+ if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+ VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+ PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+ break;
+ }
+ ++Indirects;
+ }
+ } else
+ PhiOp1 = InitVal;
+ // If this references a generated Phi in the kernel, get the Phi operand
+ // from the incoming block.
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+ MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+ bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+ // In the epilog, a map lookup is needed to get the value from the kernel,
+ // or previous epilog block. How is does this depends on if the
+ // instruction is scheduled in the previous block.
+ if (!InKernel) {
+ int StageDiffAdj = 0;
+ if (LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiffAdj = StageScheduled - LoopValStage;
+ // Use the loop value defined in the kernel, unless the kernel
+ // contains the last definition of the Phi.
+ if (np == 0 && PrevStage == LastStageNum &&
+ (StageScheduled != 0 || LoopValStage != 0) &&
+ VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+ // Use the value defined by the Phi. We add one because we switch
+ // from looking at the loop value to the Phi definition.
+ else if (np > 0 && PrevStage == LastStageNum &&
+ VRMap[PrevStage - np + 1].count(Def))
+ PhiOp2 = VRMap[PrevStage - np + 1][Def];
+ // Use the loop value defined in the kernel.
+ else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+ VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+ // Use the value defined by the Phi, unless we're generating the first
+ // epilog and the Phi refers to a Phi in a different stage.
+ else if (VRMap[PrevStage - np].count(Def) &&
+ (!LoopDefIsPhi || PrevStage != LastStageNum))
+ PhiOp2 = VRMap[PrevStage - np][Def];
+ }
+
+ // Check if we can reuse an existing Phi. This occurs when a Phi
+ // references another Phi, and the other Phi is scheduled in an
+ // earlier stage. We can try to reuse an existing Phi up until the last
+ // stage of the current Phi.
+ if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
+ int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ if (LVNumStages > (int)np) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (Schedule.isLoopCarried(this, *PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage][LoopVal];
+
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
+ } else if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+ }
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // We define the Phis after creating the new pipelined code, so
+ // we need to rename the Phi values in scheduled instructions.
+
+ unsigned PrevReg = 0;
+ if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+ PrevReg = VRMap[PrevStage - np][LoopVal];
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+ Def, NewReg, PrevReg);
+ // If the Phi has been scheduled, use the new name for rewriting.
+ if (VRMap[CurStageNum - np].count(Def)) {
+ unsigned R = VRMap[CurStageNum - np][Def];
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+ R, NewReg);
+ }
+
+ // Check if we need to rename any uses that occurs after the loop. The
+ // register to replace depends on whether the Phi is scheduled in the
+ // epilog.
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+ // In the kernel, a dependent Phi uses the value from this Phi.
+ if (InKernel)
+ PhiOp2 = NewReg;
+
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ }
+
+ while (NumPhis++ < NumStages) {
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
+ &*BBI, Def, NewReg, 0);
+ }
+
+ // Check if we need to rename a Phi that has been eliminated due to
+ // scheduling.
+ if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+ replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+ }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelened sequence.
+void SwingSchedulerDAG::generatePhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+ InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast) {
+ // Compute the stage number that contains the initial Phi value, and
+ // the Phi from the previous stage.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ unsigned StageDiff = CurStageNum - LastStageNum;
+ bool InKernel = (StageDiff == 0);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - StageDiff;
+ PrevStage = LastStageNum + StageDiff - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+ BBE = BB->instr_end();
+ BBI != BBE; ++BBI) {
+ for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BBI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+ assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+ unsigned Def = MO.getReg();
+ unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
+ // An instruction scheduled in stage 0 and is used after the loop
+ // requires a phi in the epilog for the last definition from either
+ // the kernel or prolog.
+ if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+ hasUseAfterLoop(Def, BB, MRI))
+ NumPhis = 1;
+ if (!InKernel && (unsigned)StageScheduled > PrologStage)
+ continue;
+
+ unsigned PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ // The number of Phis can't exceed the number of prolog stages. The
+ // prolog stage number is zero based.
+ if (NumPhis > PrologStage + 1 - StageScheduled)
+ NumPhis = PrologStage + 1 - StageScheduled;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ unsigned PhiOp1 = VRMap[PrologStage][Def];
+ if (np <= PrologStage)
+ PhiOp1 = VRMap[PrologStage - np][Def];
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+ if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+ }
+ if (!InKernel)
+ PhiOp2 = VRMap[PrevStage - np][Def];
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ unsigned NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // Rewrite uses and update the map. The actions depend upon whether
+ // we generating code for the kernel or epilog blocks.
+ if (InKernel) {
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, PhiOp1, NewReg);
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, PhiOp2, NewReg);
+
+ PhiOp2 = NewReg;
+ VRMap[PrevStage - np - 1][Def] = NewReg;
+ } else {
+ VRMap[CurStageNum - np][Def] = NewReg;
+ if (np == NumPhis - 1)
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+ &*BBI, Def, NewReg);
+ }
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ }
+ }
+ }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself. A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ // For each epilog block, check that the value defined by each instruction
+ // is used. If not, delete it.
+ for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
+ MBE = EpilogBBs.rend();
+ MBB != MBE; ++MBB)
+ for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
+ ME = (*MBB)->instr_rend();
+ MI != ME;) {
+ // From DeadMachineInstructionElem. Don't delete inline assembly.
+ if (MI->isInlineAsm()) {
+ ++MI;
+ continue;
+ }
+ bool SawStore = false;
+ // Check if it's safe to remove the instruction due to side effects.
+ // We can, and want to, remove Phis here.
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+ ++MI;
+ continue;
+ }
+ bool used = true;
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ unsigned reg = MOI->getReg();
+ unsigned realUses = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ // Check if there are any uses that occur only in the original
+ // loop. If so, that's not a real use.
+ if (UI->getParent()->getParent() != BB) {
+ realUses++;
+ used = true;
+ break;
+ }
+ }
+ if (realUses > 0)
+ break;
+ used = false;
+ }
+ if (!used) {
+ MI++->eraseFromParent();
+ continue;
+ }
+ ++MI;
+ }
+ // In the kernel block, check if we can remove a Phi that generates a value
+ // used in an instruction removed in the epilog block.
+ for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+ BBE = KernelBB->getFirstNonPHI();
+ BBI != BBE;) {
+ MachineInstr *MI = &*BBI;
+ ++BBI;
+ unsigned reg = MI->getOperand(0).getReg();
+ if (MRI.use_begin(reg) == MRI.use_end()) {
+ MI->eraseFromParent();
+ }
+ }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+/// v1 = phi(a, v2) v1 = phi(a, v2)
+/// v2 = phi(b, v3) v2 = phi(b, v3)
+/// v3 = .. v4 = copy v1
+/// .. = V1 v3 = ..
+/// .. = v4
+void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs,
+ SMSchedule &Schedule) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+ BBF = KernelBB->getFirstNonPHI();
+ BBI != BBF; ++BBI) {
+ unsigned Def = BBI->getOperand(0).getReg();
+ // Check for any Phi definition that used as an operand of another Phi
+ // in the same block.
+ for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+ E = MRI.use_instr_end();
+ I != E; ++I) {
+ if (I->isPHI() && I->getParent() == KernelBB) {
+ // Get the loop carried definition.
+ unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
+ if (!LCDef)
+ continue;
+ MachineInstr *MI = MRI.getVRegDef(LCDef);
+ if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+ continue;
+ // Search through the rest of the block looking for uses of the Phi
+ // definition. If one occurs, then split the lifetime.
+ unsigned SplitReg = 0;
+ for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+ KernelBB->instr_end()))
+ if (BBJ.readsRegister(Def)) {
+ // We split the lifetime when we find the first use.
+ if (SplitReg == 0) {
+ SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ }
+ BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+ }
+ if (!SplitReg)
+ continue;
+ // Search through each of the epilog blocks for any uses to be renamed.
+ for (auto &Epilog : EpilogBBs)
+ for (auto &I : *Epilog)
+ if (I.readsRegister(Def))
+ I.substituteRegister(Def, SplitReg, 0, *TRI);
+ break;
+ }
+ }
+ }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+ for (MachineInstr &MI : *BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+ if (MI.getOperand(i + 1).getMBB() == Incoming) {
+ MI.RemoveOperand(i + 1);
+ MI.RemoveOperand(i);
+ break;
+ }
+ }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block. These edges are needed if the loop ends before reaching the
+/// kernel.
+void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs,
+ SMSchedule &Schedule, ValueMapTy *VRMap) {
+ assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+ MachineInstr *IndVar = Pass.LI.LoopInductionVar;
+ MachineInstr *Cmp = Pass.LI.LoopCompare;
+ MachineBasicBlock *LastPro = KernelBB;
+ MachineBasicBlock *LastEpi = KernelBB;
+
+ // Start from the blocks connected to the kernel and work "out"
+ // to the first prolog and the last epilog blocks.
+ SmallVector<MachineInstr *, 4> PrevInsts;
+ unsigned MaxIter = PrologBBs.size() - 1;
+ unsigned LC = UINT_MAX;
+ unsigned LCMin = UINT_MAX;
+ for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+ // Add branches to the prolog that go to the corresponding
+ // epilog, and the fall-thru prolog/kernel block.
+ MachineBasicBlock *Prolog = PrologBBs[j];
+ MachineBasicBlock *Epilog = EpilogBBs[i];
+ // We've executed one iteration, so decrement the loop count and check for
+ // the loop end.
+ SmallVector<MachineOperand, 4> Cond;
+ // Check if the LOOP0 has already been removed. If so, then there is no need
+ // to reduce the trip count.
+ if (LC != 0)
+ LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
+ MaxIter);
+
+ // Record the value of the first trip count, which is used to determine if
+ // branches and blocks can be removed for constant trip counts.
+ if (LCMin == UINT_MAX)
+ LCMin = LC;
+
+ unsigned numAdded = 0;
+ if (TargetRegisterInfo::isVirtualRegister(LC)) {
+ Prolog->addSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+ } else if (j >= LCMin) {
+ Prolog->addSuccessor(Epilog);
+ Prolog->removeSuccessor(LastPro);
+ LastEpi->removeSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, LastEpi);
+ // Remove the blocks that are no longer referenced.
+ if (LastPro != LastEpi) {
+ LastEpi->clear();
+ LastEpi->eraseFromParent();
+ }
+ LastPro->clear();
+ LastPro->eraseFromParent();
+ } else {
+ numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, Prolog);
+ }
+ LastPro = Prolog;
+ LastEpi = Epilog;
+ for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+ E = Prolog->instr_rend();
+ I != E && numAdded > 0; ++I, --numAdded)
+ updateInstruction(&*I, false, j, 0, Schedule, VRMap);
+ }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned BaseReg;
+ int64_t Offset;
+ if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Check if there is a Phi. If so, get the definition in the loop.
+ MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+ if (BaseDef && BaseDef->isPHI()) {
+ BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+ BaseDef = MRI.getVRegDef(BaseReg);
+ }
+ if (!BaseDef)
+ return false;
+
+ int D = 0;
+ if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+ return false;
+
+ Delta = D;
+ return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
+ MachineInstr &OldMI, unsigned Num) {
+ if (Num == 0)
+ return;
+ // If the instruction has memory operands, then adjust the offset
+ // when the instruction appears in different stages.
+ unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
+ if (NumRefs == 0)
+ return;
+ MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
+ unsigned Refs = 0;
+ for (MachineMemOperand *MMO : NewMI.memoperands()) {
+ if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ (!MMO->getValue())) {
+ NewMemRefs[Refs++] = MMO;
+ continue;
+ }
+ unsigned Delta;
+ if (computeDelta(OldMI, Delta)) {
+ int64_t AdjOffset = Delta * Num;
+ NewMemRefs[Refs++] =
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+ } else
+ NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
+ }
+ NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
+ unsigned CurStageNum,
+ unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ // Check for tied operands in inline asm instructions. This should be handled
+ // elsewhere, but I'm not sure of the best solution.
+ if (OldMI->isInlineAsm())
+ for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+ const auto &MO = OldMI->getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ break;
+ unsigned UseIdx;
+ if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
+ NewMI->tieOperands(i, UseIdx);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
+ unsigned CurStageNum,
+ unsigned InstStageNum,
+ SMSchedule &Schedule) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(getSUnit(OldMI));
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+ return nullptr;
+ int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+ MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+ if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
+ NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers. This
+/// function may change the defintions and/or uses.
+void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum,
+ unsigned InstrStageNum,
+ SMSchedule &Schedule,
+ ValueMapTy *VRMap) {
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned reg = MO.getReg();
+ if (MO.isDef()) {
+ // Create a new virtual register for the definition.
+ const TargetRegisterClass *RC = MRI.getRegClass(reg);
+ unsigned NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[CurStageNum][reg] = NewReg;
+ if (LastDef)
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ } else if (MO.isUse()) {
+ MachineInstr *Def = MRI.getVRegDef(reg);
+ // Compute the stage that contains the last definition for instruction.
+ int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
+ unsigned StageNum = CurStageNum;
+ if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+ // Compute the difference in stages between the defintion and the use.
+ unsigned StageDiff = (InstrStageNum - DefStageNum);
+ // Make an adjustment to get the last definition.
+ StageNum -= StageDiff;
+ }
+ if (VRMap[StageNum].count(reg))
+ MO.setReg(VRMap[StageNum][reg]);
+ }
+ }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
+ unsigned LoopVal, unsigned LoopStage,
+ ValueMapTy *VRMap,
+ MachineBasicBlock *BB) {
+ unsigned PrevVal = 0;
+ if (StageNum > PhiStage) {
+ MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+ if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+ // The name is defined in the previous stage.
+ PrevVal = VRMap[StageNum - 1][LoopVal];
+ else if (VRMap[StageNum].count(LoopVal))
+ // The previous name is defined in the current stage when the instruction
+ // order is swapped.
+ PrevVal = VRMap[StageNum][LoopVal];
+ else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+ // The loop value hasn't yet been scheduled.
+ PrevVal = LoopVal;
+ else if (StageNum == PhiStage + 1)
+ // The loop value is another phi, which has not been scheduled.
+ PrevVal = getInitPhiReg(*LoopInst, BB);
+ else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+ // The loop value is another phi, which has been scheduled.
+ PrevVal =
+ getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+ LoopStage, VRMap, BB);
+ }
+ return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
+ unsigned StageNum,
+ SMSchedule &Schedule,
+ ValueMapTy *VRMap,
+ InstrMapTy &InstrMap) {
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstNonPHI();
+ BBI != BBE; ++BBI) {
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*BBI, BB, InitVal, LoopVal);
+ unsigned PhiDef = BBI->getOperand(0).getReg();
+
+ unsigned PhiStage =
+ (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
+ unsigned LoopStage =
+ (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+ unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
+ if (NumPhis > StageNum)
+ NumPhis = StageNum;
+ for (unsigned np = 0; np <= NumPhis; ++np) {
+ unsigned NewVal =
+ getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+ if (!NewVal)
+ NewVal = InitVal;
+ rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
+ PhiDef, NewVal);
+ }
+ }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void SwingSchedulerDAG::rewriteScheduledInstr(
+ MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
+ unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
+ unsigned NewReg, unsigned PrevReg) {
+ bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
+ int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
+ // Rewrite uses that have been scheduled already to use the new
+ // Phi register.
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
+ EI = MRI.use_end();
+ UI != EI;) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ ++UI;
+ if (UseMI->getParent() != BB)
+ continue;
+ if (UseMI->isPHI()) {
+ if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+ continue;
+ if (getLoopPhiReg(*UseMI, BB) != OldReg)
+ continue;
+ }
+ InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+ assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+ SUnit *OrigMISU = getSUnit(OrigInstr->second);
+ int StageSched = Schedule.stageScheduled(OrigMISU);
+ int CycleSched = Schedule.cycleScheduled(OrigMISU);
+ unsigned ReplaceReg = 0;
+ // This is the stage for the scheduled instruction.
+ if (StagePhi == StageSched && Phi->isPHI()) {
+ int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
+ if (PrevReg && InProlog)
+ ReplaceReg = PrevReg;
+ else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
+ (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
+ ReplaceReg = PrevReg;
+ else
+ ReplaceReg = NewReg;
+ }
+ // The scheduled instruction occurs before the scheduled Phi, and the
+ // Phi is not loop carried.
+ if (!InProlog && StagePhi + 1 == StageSched &&
+ !Schedule.isLoopCarried(this, *Phi))
+ ReplaceReg = NewReg;
+ if (StagePhi > StageSched && Phi->isPHI())
+ ReplaceReg = NewReg;
+ if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+ ReplaceReg = NewReg;
+ if (ReplaceReg) {
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ UseOp.setReg(ReplaceReg);
+ }
+ }
+}
+
+/// Check if we can change the instruction to use an offset value from the
+/// previous iteration. If so, return true and set the base and offset values
+/// so that we can rewrite the load, if necessary.
+/// v1 = Phi(v0, v3)
+/// v2 = load v1, 0
+/// v3 = post_store v1, 4, x
+/// This function enables the load to be rewritten as v2 = load v3, 4.
+bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
+ unsigned &BasePos,
+ unsigned &OffsetPos,
+ unsigned &NewBase,
+ int64_t &Offset) {
+ // Get the load instruction.
+ if (TII->isPostIncrement(*MI))
+ return false;
+ unsigned BasePosLd, OffsetPosLd;
+ if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
+ return false;
+ unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
+
+ // Look for the Phi instruction.
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineInstr *Phi = MRI.getVRegDef(BaseReg);
+ if (!Phi || !Phi->isPHI())
+ return false;
+ // Get the register defined in the loop block.
+ unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
+ if (!PrevReg)
+ return false;
+
+ // Check for the post-increment load/store instruction.
+ MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
+ if (!PrevDef || PrevDef == MI)
+ return false;
+
+ if (!TII->isPostIncrement(*PrevDef))
+ return false;
+
+ unsigned BasePos1 = 0, OffsetPos1 = 0;
+ if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
+ return false;
+
+ // Make sure offset values are both positive or both negative.
+ int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
+ int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
+ if ((LoadOffset >= 0) != (StoreOffset >= 0))
+ return false;
+
+ // Set the return value once we determine that we return true.
+ BasePos = BasePosLd;
+ OffsetPos = OffsetPosLd;
+ NewBase = PrevReg;
+ Offset = StoreOffset;
+ return true;
+}
+
+/// Apply changes to the instruction if needed. The changes are need
+/// to improve the scheduling and depend up on the final schedule.
+MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+ SMSchedule &Schedule,
+ bool UpdateDAG) {
+ SUnit *SU = getSUnit(MI);
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+ return nullptr;
+ unsigned BaseReg = MI->getOperand(BasePos).getReg();
+ MachineInstr *LoopDef = findDefInLoop(BaseReg);
+ int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
+ int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
+ int BaseStageNum = Schedule.stageScheduled(SU);
+ int BaseCycleNum = Schedule.cycleScheduled(SU);
+ if (BaseStageNum < DefStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ int OffsetDiff = DefStageNum - BaseStageNum;
+ if (DefCycleNum < BaseCycleNum) {
+ NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
+ if (OffsetDiff > 0)
+ --OffsetDiff;
+ }
+ int64_t NewOffset =
+ MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ if (UpdateDAG) {
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
+ }
+ NewMIs.insert(NewMI);
+ return NewMI;
+ }
+ }
+ return nullptr;
+}
+
+/// Return true for an order dependence that is loop carried potentially.
+/// An order dependence is loop carried if the destination defines a value
+/// that may be used by the source in a subsequent iteration.
+bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
+ bool isSucc) {
+ if (!isOrder(Source, Dep) || Dep.isArtificial())
+ return false;
+
+ if (!SwpPruneLoopCarried)
+ return true;
+
+ MachineInstr *SI = Source->getInstr();
+ MachineInstr *DI = Dep.getSUnit()->getInstr();
+ if (!isSucc)
+ std::swap(SI, DI);
+ assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
+
+ // Assume ordered loads and stores may have a loop carried dependence.
+ if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+ SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
+ return true;
+
+ // Only chain dependences between a load and store can be loop carried.
+ if (!DI->mayStore() || !SI->mayLoad())
+ return false;
+
+ unsigned DeltaS, DeltaD;
+ if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
+ return true;
+
+ unsigned BaseRegS, BaseRegD;
+ int64_t OffsetS, OffsetD;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
+ !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+ return true;
+
+ if (BaseRegS != BaseRegD)
+ return true;
+
+ uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
+ uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
+
+ // This is the main test, which checks the offset values and the loop
+ // increment value to determine if the accesses may be loop carried.
+ if (OffsetS >= OffsetD)
+ return OffsetS + AccessSizeS > DeltaS;
+ else if (OffsetS < OffsetD)
+ return OffsetD + AccessSizeD > DeltaD;
+
+ return true;
+}
+
+void SwingSchedulerDAG::postprocessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+/// Try to schedule the node at the specified StartCycle and continue
+/// until the node is schedule or the EndCycle is reached. This function
+/// returns true if the node is scheduled. This routine may search either
+/// forward or backward for a place to insert the instruction based upon
+/// the relative values of StartCycle and EndCycle.
+bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
+ bool forward = true;
+ if (StartCycle > EndCycle)
+ forward = false;
+
+ // The terminating condition depends on the direction.
+ int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
+ for (int curCycle = StartCycle; curCycle != termCycle;
+ forward ? ++curCycle : --curCycle) {
+
+ // Add the already scheduled instructions at the specified cycle to the DFA.
+ Resources->clearResources();
+ for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
+ checkCycle <= LastCycle; checkCycle += II) {
+ std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
+
+ for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
+ E = cycleInstrs.end();
+ I != E; ++I) {
+ if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
+ continue;
+ assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+ "These instructions have already been scheduled.");
+ Resources->reserveResources(*(*I)->getInstr());
+ }
+ }
+ if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
+ Resources->canReserveResources(*SU->getInstr())) {
+ DEBUG({
+ dbgs() << "\tinsert at cycle " << curCycle << " ";
+ SU->getInstr()->dump();
+ });
+
+ ScheduledInstrs[curCycle].push_back(SU);
+ InstrToCycle.insert(std::make_pair(SU, curCycle));
+ if (curCycle > LastCycle)
+ LastCycle = curCycle;
+ if (curCycle < FirstCycle)
+ FirstCycle = curCycle;
+ return true;
+ }
+ DEBUG({
+ dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
+ SU->getInstr()->dump();
+ });
+ }
+ return false;
+}
+
+// Return the cycle of the earliest scheduled instruction in the chain.
+int SMSchedule::earliestCycleInChain(const SDep &Dep) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SDep, 8> Worklist;
+ Worklist.push_back(Dep);
+ int EarlyCycle = INT_MAX;
+ while (!Worklist.empty()) {
+ const SDep &Cur = Worklist.pop_back_val();
+ SUnit *PrevSU = Cur.getSUnit();
+ if (Visited.count(PrevSU))
+ continue;
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
+ if (it == InstrToCycle.end())
+ continue;
+ EarlyCycle = std::min(EarlyCycle, it->second);
+ for (const auto &PI : PrevSU->Preds)
+ if (SwingSchedulerDAG::isOrder(PrevSU, PI))
+ Worklist.push_back(PI);
+ Visited.insert(PrevSU);
+ }
+ return EarlyCycle;
+}
+
+// Return the cycle of the latest scheduled instruction in the chain.
+int SMSchedule::latestCycleInChain(const SDep &Dep) {
+ SmallPtrSet<SUnit *, 8> Visited;
+ SmallVector<SDep, 8> Worklist;
+ Worklist.push_back(Dep);
+ int LateCycle = INT_MIN;
+ while (!Worklist.empty()) {
+ const SDep &Cur = Worklist.pop_back_val();
+ SUnit *SuccSU = Cur.getSUnit();
+ if (Visited.count(SuccSU))
+ continue;
+ std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
+ if (it == InstrToCycle.end())
+ continue;
+ LateCycle = std::max(LateCycle, it->second);
+ for (const auto &SI : SuccSU->Succs)
+ if (SwingSchedulerDAG::isOrder(SuccSU, SI))
+ Worklist.push_back(SI);
+ Visited.insert(SuccSU);
+ }
+ return LateCycle;
+}
+
+/// If an instruction has a use that spans multiple iterations, then
+/// return true. These instructions are characterized by having a back-ege
+/// to a Phi, which contains a reference to another Phi.
+static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
+ for (auto &P : SU->Preds)
+ if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
+ for (auto &S : P.getSUnit()->Succs)
+ if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
+ return P.getSUnit();
+ return nullptr;
+}
+
+/// Compute the scheduling start slot for the instruction. The start slot
+/// depends on any predecessor or successor nodes scheduled already.
+void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+ int *MinEnd, int *MaxStart, int II,
+ SwingSchedulerDAG *DAG) {
+ // Iterate over each instruction that has been scheduled already. The start
+ // slot computuation depends on whether the previously scheduled instruction
+ // is a predecessor or successor of the specified instruction.
+ for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
+
+ // Iterate over each instruction in the current cycle.
+ for (SUnit *I : getInstructions(cycle)) {
+ // Because we're processing a DAG for the dependences, we recognize
+ // the back-edge in recurrences by anti dependences.
+ for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
+ const SDep &Dep = SU->Preds[i];
+ if (Dep.getSUnit() == I) {
+ if (!DAG->isBackedge(SU, Dep)) {
+ int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+ if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
+ int End = earliestCycleInChain(Dep) + (II - 1);
+ *MinEnd = std::min(*MinEnd, End);
+ }
+ } else {
+ int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+ *MinLateStart = std::min(*MinLateStart, LateStart);
+ }
+ }
+ // For instruction that requires multiple iterations, make sure that
+ // the dependent instruction is not scheduled past the definition.
+ SUnit *BE = multipleIterations(I, DAG);
+ if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
+ !SU->isPred(I))
+ *MinLateStart = std::min(*MinLateStart, cycle);
+ }
+ for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
+ if (SU->Succs[i].getSUnit() == I) {
+ const SDep &Dep = SU->Succs[i];
+ if (!DAG->isBackedge(SU, Dep)) {
+ int LateStart = cycle - DAG->getLatency(SU, Dep) +
+ DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+ *MinLateStart = std::min(*MinLateStart, LateStart);
+ if (DAG->isLoopCarriedOrder(SU, Dep)) {
+ int Start = latestCycleInChain(Dep) + 1 - II;
+ *MaxStart = std::max(*MaxStart, Start);
+ }
+ } else {
+ int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+ DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+ }
+ }
+ }
+ }
+}
+
+/// Order the instructions within a cycle so that the definitions occur
+/// before the uses. Returns true if the instruction is added to the start
+/// of the list, or false if added to the end.
+bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+ std::deque<SUnit *> &Insts) {
+ MachineInstr *MI = SU->getInstr();
+ bool OrderBeforeUse = false;
+ bool OrderAfterDef = false;
+ bool OrderBeforeDef = false;
+ unsigned MoveDef = 0;
+ unsigned MoveUse = 0;
+ int StageInst1 = stageScheduled(SU);
+
+ unsigned Pos = 0;
+ for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
+ ++I, ++Pos) {
+ // Relative order of Phis does not matter.
+ if (MI->isPHI() && (*I)->getInstr()->isPHI())
+ continue;
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned Reg = MO.getReg();
+ unsigned BasePos, OffsetPos;
+ if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+ if (MI->getOperand(BasePos).getReg() == Reg)
+ if (unsigned NewReg = SSD->getInstrBaseReg(SU))
+ Reg = NewReg;
+ bool Reads, Writes;
+ std::tie(Reads, Writes) =
+ (*I)->getInstr()->readsWritesVirtualRegister(Reg);
+ if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
+ // Add the instruction after the scheduled instruction.
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
+ if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ } else {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ if (MoveUse != 0) {
+ OrderAfterDef = true;
+ MoveDef = Pos - 1;
+ }
+ } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
+ // Add the instruction before the scheduled instruction.
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
+ isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
+ OrderBeforeDef = true;
+ MoveUse = Pos;
+ }
+ }
+ // Check for order dependences between instructions. Make sure the source
+ // is ordered before the destination.
+ for (auto &S : SU->Succs)
+ if (S.getKind() == SDep::Order) {
+ if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ }
+ } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
+ if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
+ if (S.isAssignedRegDep()) {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ } else {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ }
+ }
+ for (auto &P : SU->Preds)
+ if (P.getKind() == SDep::Order) {
+ if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
+ if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
+ if (P.isAssignedRegDep()) {
+ OrderBeforeUse = true;
+ MoveUse = Pos;
+ }
+ } else {
+ OrderAfterDef = true;
+ MoveDef = Pos;
+ }
+ }
+ }
+
+ // A circular dependence.
+ if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
+ OrderBeforeUse = false;
+
+ // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
+ // to a loop-carried dependence.
+ if (OrderBeforeDef)
+ OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
+
+ // The uncommon case when the instruction order needs to be updated because
+ // there is both a use and def.
+ if (OrderBeforeUse && OrderAfterDef) {
+ SUnit *UseSU = Insts.at(MoveUse);
+ SUnit *DefSU = Insts.at(MoveDef);
+ if (MoveUse > MoveDef) {
+ Insts.erase(Insts.begin() + MoveUse);
+ Insts.erase(Insts.begin() + MoveDef);
+ } else {
+ Insts.erase(Insts.begin() + MoveDef);
+ Insts.erase(Insts.begin() + MoveUse);
+ }
+ if (orderDependence(SSD, UseSU, Insts)) {
+ Insts.push_front(SU);
+ orderDependence(SSD, DefSU, Insts);
+ return true;
+ }
+ Insts.pop_back();
+ Insts.push_back(SU);
+ Insts.push_back(UseSU);
+ orderDependence(SSD, DefSU, Insts);
+ return false;
+ }
+ // Put the new instruction first if there is a use in the list. Otherwise,
+ // put it at the end of the list.
+ if (OrderBeforeUse)
+ Insts.push_front(SU);
+ else
+ Insts.push_back(SU);
+ return OrderBeforeUse;
+}
+
+/// Return true if the scheduled Phi has a loop carried operand.
+bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
+ if (!Phi.isPHI())
+ return false;
+ assert(Phi.isPHI() && "Expecing a Phi.");
+ SUnit *DefSU = SSD->getSUnit(&Phi);
+ unsigned DefCycle = cycleScheduled(DefSU);
+ int DefStage = stageScheduled(DefSU);
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+ SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
+ if (!UseSU)
+ return true;
+ if (UseSU->getInstr()->isPHI())
+ return true;
+ unsigned LoopCycle = cycleScheduled(UseSU);
+ int LoopStage = stageScheduled(UseSU);
+ return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+/// Return true if the instruction is a definition that is loop carried
+/// and defines the use on the next iteration.
+/// v1 = phi(v2, v3)
+/// (Def) v3 = op v1
+/// (MO) = v1
+/// If MO appears before Def, then then v1 and v3 may get assigned to the same
+/// register.
+bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
+ MachineInstr *Def, MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+ if (Def->isPHI())
+ return false;
+ MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
+ if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
+ return false;
+ if (!isLoopCarried(SSD, *Phi))
+ return false;
+ unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ MachineOperand &DMO = Def->getOperand(i);
+ if (!DMO.isReg() || !DMO.isDef())
+ continue;
+ if (DMO.getReg() == LoopReg)
+ return true;
+ }
+ return false;
+}
+
+// Check if the generated schedule is valid. This function checks if
+// an instruction that uses a physical register is scheduled in a
+// different stage than the definition. The pipeliner does not handle
+// physical register values that may cross a basic block boundary.
+bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
+ for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
+ SUnit &SU = SSD->SUnits[i];
+ if (!SU.hasPhysRegDefs)
+ continue;
+ int StageDef = stageScheduled(&SU);
+ assert(StageDef != -1 && "Instruction should have been scheduled.");
+ for (auto &SI : SU.Succs)
+ if (SI.isAssignedRegDep())
+ if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
+ if (stageScheduled(SI.getSUnit()) != StageDef)
+ return false;
+ }
+ return true;
+}
+
+/// After the schedule has been formed, call this function to combine
+/// the instructions from the different stages/cycles. That is, this
+/// function creates a schedule that represents a single iteration.
+void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
+ // Move all instructions to the first stage from later stages.
+ for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+ for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
+ ++stage) {
+ std::deque<SUnit *> &cycleInstrs =
+ ScheduledInstrs[cycle + (stage * InitiationInterval)];
+ for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
+ E = cycleInstrs.rend();
+ I != E; ++I)
+ ScheduledInstrs[cycle].push_front(*I);
+ }
+ }
+ // Iterate over the definitions in each instruction, and compute the
+ // stage difference for each use. Keep the maximum value.
+ for (auto &I : InstrToCycle) {
+ int DefStage = stageScheduled(I.first);
+ MachineInstr *MI = I.first->getInstr();
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ unsigned MaxDiff = 0;
+ bool PhiIsSwapped = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ SUnit *SUnitUse = SSD->getSUnit(UseMI);
+ int UseStage = stageScheduled(SUnitUse);
+ unsigned Diff = 0;
+ if (UseStage != -1 && UseStage >= DefStage)
+ Diff = UseStage - DefStage;
+ if (MI->isPHI()) {
+ if (isLoopCarried(SSD, *MI))
+ ++Diff;
+ else
+ PhiIsSwapped = true;
+ }
+ MaxDiff = std::max(Diff, MaxDiff);
+ }
+ RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+ }
+ }
+
+ // Erase all the elements in the later stages. Only one iteration should
+ // remain in the scheduled list, and it contains all the instructions.
+ for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
+ ScheduledInstrs.erase(cycle);
+
+ // Change the registers in instruction as specified in the InstrChanges
+ // map. We need to use the new registers to create the correct order.
+ for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SSD->SUnits[i];
+ SSD->applyInstrChange(SU->getInstr(), *this, true);
+ }
+
+ // Reorder the instructions in each cycle to fix and improve the
+ // generated code.
+ for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
+ std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
+ std::deque<SUnit *> newOrderZC;
+ // Put the zero-cost, pseudo instructions at the start of the cycle.
+ for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+ SUnit *SU = cycleInstrs[i];
+ if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+ orderDependence(SSD, SU, newOrderZC);
+ }
+ std::deque<SUnit *> newOrderI;
+ // Then, add the regular instructions back.
+ for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+ SUnit *SU = cycleInstrs[i];
+ if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+ orderDependence(SSD, SU, newOrderI);
+ }
+ // Replace the old order with the new order.
+ cycleInstrs.swap(newOrderZC);
+ cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ }
+
+ DEBUG(dump(););
+}
+
+/// Print the schedule information to the given output.
+void SMSchedule::print(raw_ostream &os) const {
+ // Iterate over each cycle.
+ for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+ // Iterate over each instruction in the cycle.
+ const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
+ for (SUnit *CI : cycleInstrs->second) {
+ os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
+ os << "(" << CI->NodeNum << ") ";
+ CI->getInstr()->print(os);
+ os << "\n";
+ }
+ }
+}
+
+/// Utility function used for debugging to print the schedule.
+void SMSchedule::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 613598d..242cb0b 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,11 +21,16 @@
using namespace llvm;
+static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
+ cl::init(true), cl::desc("Enable subregister liveness tracking."));
+
// Pin the vtable to this file.
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) {
+ : MF(MF), TheDelegate(nullptr),
+ TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -88,6 +93,13 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
return true;
}
+unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ RegAllocHints.grow(Reg);
+ return Reg;
+}
+
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
///
@@ -98,41 +110,42 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
"Virtual register RegClass must be allocatable.");
// New virtual register number.
- unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
- VRegInfo.grow(Reg);
+ unsigned Reg = createIncompleteVirtualRegister();
VRegInfo[Reg].first = RegClass;
- RegAllocHints.grow(Reg);
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
-unsigned
-MachineRegisterInfo::getSize(unsigned VReg) const {
- VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg);
- return SizeIt != getVRegToSize().end() ? SizeIt->second : 0;
+LLT MachineRegisterInfo::getType(unsigned VReg) const {
+ VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg);
+ return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{};
}
-void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) {
- getVRegToSize()[VReg] = Size;
+void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
+ // Check that VReg doesn't have a class.
+ assert((getRegClassOrRegBank(VReg).isNull() ||
+ !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
+ "Can't set the size of a non-generic virtual register");
+ getVRegToType()[VReg] = Ty;
}
unsigned
-MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) {
- assert(Size && "Cannot create empty virtual register");
-
+MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) {
// New virtual register number.
- unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
- VRegInfo.grow(Reg);
+ unsigned Reg = createIncompleteVirtualRegister();
// FIXME: Should we use a dummy register class?
- VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr);
- getVRegToSize()[Reg] = Size;
- RegAllocHints.grow(Reg);
+ VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
+ getVRegToType()[Reg] = Ty;
if (TheDelegate)
TheDelegate->MRI_NoteNewVirtualRegister(Reg);
return Reg;
}
+void MachineRegisterInfo::clearVirtRegTypes() {
+ getVRegToType().clear();
+}
+
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
@@ -444,13 +457,16 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
"Invalid ReservedRegs vector from target");
}
-bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
- const MachineFunction &MF) const {
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ if (TRI->isConstantPhysReg(PhysReg))
+ return true;
+
// Check if any overlapping register is modified, or allocatable so it may be
// used later.
- for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true);
+ for (MCRegAliasIterator AI(PhysReg, TRI, true);
AI.isValid(); ++AI)
if (!def_empty(*AI) || isAllocatable(*AI))
return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 47ad60c..e9b4755 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index d921e29..e06bc51 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched(
cl::desc("Enable the post-ra machine instruction scheduling pass."),
cl::init(true), cl::Hidden);
-/// Forward declare the standard machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
-
/// Decrement this iterator until reaching the top or a non-debug instr.
static MachineBasicBlock::const_iterator
priorNonDebug(MachineBasicBlock::const_iterator I,
@@ -251,8 +246,8 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
static MachineBasicBlock::iterator
priorNonDebug(MachineBasicBlock::iterator I,
MachineBasicBlock::const_iterator Beg) {
- return const_cast<MachineInstr*>(
- &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
+ return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
+ .getNonConstIterator();
}
/// If this iterator is a debug value, increment until reaching the End or a
@@ -271,12 +266,8 @@ nextIfDebug(MachineBasicBlock::const_iterator I,
static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I,
MachineBasicBlock::const_iterator End) {
- // Cast the return value to nonconst MachineInstr, then cast to an
- // instr_iterator, which does not check for null, finally return a
- // bundle_iterator.
- return MachineBasicBlock::instr_iterator(
- const_cast<MachineInstr*>(
- &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+ return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
+ .getNonConstIterator();
}
/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
@@ -458,9 +449,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
for (;I != MBB->begin(); --I) {
- if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
+ MachineInstr &MI = *std::prev(I);
+ if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!I->isDebugValue())
+ if (!MI.isDebugValue())
++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
@@ -692,8 +684,14 @@ void ScheduleDAGMI::schedule() {
// This may initialize a DFSResult to be used for queue priority.
SchedImpl->initialize(this);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ DEBUG(
+ if (EntrySU.getInstr() != nullptr)
+ EntrySU.dumpAll(this);
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this);
+ if (ExitSU.getInstr() != nullptr)
+ ExitSU.dumpAll(this);
+ );
if (ViewMISchedDAGs) viewGraph();
// Initialize ready queues now that the DAG and priority data are finalized.
@@ -862,6 +860,44 @@ ScheduleDAGMILive::~ScheduleDAGMILive() {
delete DFSResult;
}
+void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
+ const MachineInstr &MI = *SU.getInstr();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (TrackLaneMasks && !MO.isUse())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // Ignore re-defs.
+ if (TrackLaneMasks) {
+ bool FoundDef = false;
+ for (const MachineOperand &MO2 : MI.operands()) {
+ if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ FoundDef = true;
+ break;
+ }
+ }
+ if (FoundDef)
+ continue;
+ }
+
+ // Record this local VReg use.
+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == &SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
+ }
+}
+
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
/// crossing a scheduling boundary. [begin, end) includes all instructions in
/// the region, including the boundary itself and single-instruction regions
@@ -889,6 +925,11 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
// Setup the register pressure trackers for the top scheduled top and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
+ VRegUses.clear();
+ VRegUses.setUniverse(MRI.getNumVirtRegs());
+ for (SUnit &SU : SUnits)
+ collectVRegUses(SU);
+
TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
ShouldTrackLaneMasks, false);
BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
@@ -999,7 +1040,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
// this fact anymore => decrement pressure.
// If the register has just become dead then other uses make it come
// back to life => increment pressure.
- bool Decrement = P.LaneMask != 0;
+ bool Decrement = P.LaneMask.any();
for (const VReg2SUnit &V2SU
: make_range(VRegUses.find(Reg), VRegUses.end())) {
@@ -1018,7 +1059,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
);
}
} else {
- assert(P.LaneMask != 0);
+ assert(P.LaneMask.any());
DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
// This may be called before CurrentBottom has been initialized. However,
// BotRPTracker must have a valid position. We want the value live into the
@@ -1087,6 +1128,8 @@ void ScheduleDAGMILive::schedule() {
SchedImpl->initialize(this);
DEBUG(
+ if (EntrySU.getInstr() != nullptr)
+ EntrySU.dumpAll(this);
for (const SUnit &SU : SUnits) {
SU.dumpAll(this);
if (ShouldTrackPressure) {
@@ -1095,6 +1138,8 @@ void ScheduleDAGMILive::schedule() {
}
dbgs() << '\n';
}
+ if (ExitSU.getInstr() != nullptr)
+ ExitSU.dumpAll(this);
);
if (ViewMISchedDAGs) viewGraph();
@@ -1362,7 +1407,8 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
: SU(su), BaseReg(reg), Offset(ofs) {}
bool operator<(const MemOpInfo&RHS) const {
- return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
+ return std::tie(BaseReg, Offset, SU->NodeNum) <
+ std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
}
};
@@ -1395,6 +1441,24 @@ public:
};
} // anonymous
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+ : nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+ : nullptr;
+}
+
+} // namespace llvm
+
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
@@ -1487,29 +1551,23 @@ namespace {
/// that may be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
public:
- MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
- : TII(TII), TRI(TRI) {}
+ MacroFusion(const TargetInstrInfo &TII)
+ : TII(TII) {}
void apply(ScheduleDAGInstrs *DAGInstrs) override;
};
} // anonymous
-/// Returns true if \p MI reads a register written by \p Other.
-static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
- const MachineInstr &Other) {
- for (const MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !MO.readsReg())
- continue;
+namespace llvm {
- unsigned Reg = MO.getReg();
- if (Other.modifiesRegister(Reg, &TRI))
- return true;
- }
- return false;
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
+ return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
}
+} // namespace llvm
+
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
@@ -1521,16 +1579,12 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
if (!Branch)
return;
- for (SUnit &SU : DAG->SUnits) {
- // SUnits with successors can't be schedule in front of the ExitSU.
- if (!SU.Succs.empty())
- continue;
- // We only care if the node writes to a register that the branch reads.
- MachineInstr *Pred = SU.getInstr();
- if (!HasDataDep(TRI, *Branch, *Pred))
+ for (SDep &PredDep : ExitSU.Preds) {
+ if (PredDep.isWeak())
continue;
-
- if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
+ SUnit &SU = *PredDep.getSUnit();
+ MachineInstr &Pred = *SU.getInstr();
+ if (!TII.shouldScheduleAdjacent(Pred, *Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1543,6 +1597,16 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
(void)Success;
assert(Success && "No DAG nodes should be reachable from ExitSU");
+ // Adjust latency of data deps between the nodes.
+ for (SDep &PredDep : ExitSU.Preds) {
+ if (PredDep.getSUnit() == &SU)
+ PredDep.setLatency(0);
+ }
+ for (SDep &SuccDep : SU.Succs) {
+ if (SuccDep.getSUnit() == &ExitSU)
+ SuccDep.setLatency(0);
+ }
+
DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
break;
}
@@ -1572,6 +1636,16 @@ protected:
};
} // anonymous
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ return make_unique<CopyConstrain>(TII, TRI);
+}
+
+} // namespace llvm
+
/// constrainLocalCopy handles two possibilities:
/// 1) Local src:
/// I0: = dst
@@ -1760,7 +1834,6 @@ void SchedBoundary::reset() {
Available.clear();
Pending.clear();
CheckPending = false;
- NextSUs.clear();
CurrCycle = 0;
CurrMOps = 0;
MinReadyCycle = UINT_MAX;
@@ -1961,23 +2034,6 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
Pending.push(SU);
else
Available.push(SU);
-
- // Record this node as an immediate dependent of the scheduled node.
- NextSUs.insert(SU);
-}
-
-void SchedBoundary::releaseTopNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
- releaseNode(SU, SU->TopReadyCycle);
-}
-
-void SchedBoundary::releaseBottomNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
- releaseNode(SU, SU->BotReadyCycle);
}
/// Move the boundary of scheduled code by one cycle.
@@ -2828,9 +2884,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
bool SameBoundary = Zone != nullptr;
if (SameBoundary) {
// For loops that are acyclic path limited, aggressively schedule for
- // latency. This can result in very long dependence chains scheduled in
- // sequence, so once every cycle (when CurrMOps == 0), switch to normal
- // heuristics.
+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+ // heuristics to take precedence.
if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
tryLatency(TryCand, Cand, *Zone))
return;
@@ -2888,13 +2943,6 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
return;
- // Prefer immediate defs/users of the last scheduled instruction. This is a
- // local pressure avoidance strategy that also makes the machine code
- // readable.
- if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
- TryCand, Cand, NextDefUse))
- return;
-
// Fall through to original instruction order.
if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
|| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
@@ -3105,28 +3153,24 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
- DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
- if (EnableMemOpCluster) {
- if (DAG->TII->enableClusterLoads())
- DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
- if (DAG->TII->enableClusterStores())
- DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
- }
- if (EnableMacroFusion)
- DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
+static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+ return createGenericSchedLive(C);
+}
+
static MachineSchedRegistry
GenericSchedRegistry("converge", "Standard converging scheduler.",
- createGenericSchedLive);
+ createConveringSched);
//===----------------------------------------------------------------------===//
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3257,9 +3301,9 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
Top.bumpNode(SU);
}
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+ /*RemoveKillFlags=*/true);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 571a5c1..5f87b68 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -22,9 +22,15 @@
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/LLVMContext.h"
@@ -34,6 +40,13 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "machine-sink"
@@ -48,12 +61,21 @@ UseBlockFreqInfo("machine-sink-bfi",
cl::desc("Use block frequency info to find successors to sink"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+ "machine-sink-split-probability-threshold",
+ cl::desc(
+ "Percentage threshold for splitting single-instruction critical edge. "
+ "If the branch threshold is higher than this threshold, we allow "
+ "speculative execution of up to 1 instruction to avoid branching to "
+ "splitted critical edge"),
+ cl::init(40), cl::Hidden);
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
namespace {
+
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -62,15 +84,16 @@ namespace {
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
const MachineBlockFrequencyInfo *MBFI;
+ const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
- SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
CEBCandidates;
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
- SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
+ SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit;
SparseBitVector<> RegsToClearKillFlags;
@@ -79,6 +102,7 @@ namespace {
public:
static char ID; // Pass identification
+
MachineSinking() : MachineFunctionPass(ID) {
initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
}
@@ -92,6 +116,7 @@ namespace {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
@@ -143,12 +168,14 @@ namespace {
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
};
+
} // end anonymous namespace
char MachineSinking::ID = 0;
char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -269,11 +296,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverMadeChange = false;
- while (1) {
+ while (true) {
bool MadeChange = false;
// Process all basic blocks.
@@ -369,6 +397,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
+ if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
+ BranchProbability(SplitEdgeProbabilityThreshold, 100))
+ return true;
+
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
@@ -604,7 +636,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ if (!MRI->isConstantPhysReg(Reg))
return nullptr;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 86332c8..ef7e525 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -430,16 +430,17 @@ public:
po_iterator_storage(LoopBounds &lb) : LB(lb) {}
void finishPostorder(const MachineBasicBlock*) {}
- bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ bool insertEdge(Optional<const MachineBasicBlock *> From,
+ const MachineBasicBlock *To) {
// Skip already visited To blocks.
MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
return false;
// From is null once when To is the trace center block.
if (From) {
- if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
// Don't follow backedges, don't leave FromLoop when going upwards.
- if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ if ((LB.Downward ? To : *From) == FromLoop->getHeader())
return false;
// Don't leave FromLoop.
if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index a70adb0..a98139f 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -70,6 +70,10 @@ namespace {
unsigned foundErrors;
+ // Avoid querying the MachineFunctionProperties for each operand.
+ bool isFunctionRegBankSelected;
+ bool isFunctionSelected;
+
typedef SmallVector<unsigned, 16> RegVector;
typedef SmallVector<const uint32_t*, 4> RegMaskVector;
typedef DenseSet<unsigned> RegSet;
@@ -204,16 +208,13 @@ namespace {
void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
void visitMachineFunctionAfter();
- template <typename T> void report(const char *msg, ilist_iterator<T> I) {
- report(msg, &*I);
- }
void report(const char *msg, const MachineFunction *MF);
void report(const char *msg, const MachineBasicBlock *MBB);
void report(const char *msg, const MachineInstr *MI);
void report(const char *msg, const MachineOperand *MO, unsigned MONum);
void report_context(const LiveInterval &LI) const;
- void report_context(const LiveRange &LR, unsigned Reg,
+ void report_context(const LiveRange &LR, unsigned VRegUnit,
LaneBitmask LaneMask) const;
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
@@ -228,10 +229,10 @@ namespace {
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
- LaneBitmask LaneMask = 0);
+ LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
- LaneBitmask LaneMask = 0);
+ LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
void calcRegsPassed();
@@ -242,11 +243,12 @@ namespace {
void verifyLiveIntervals();
void verifyLiveInterval(const LiveInterval&);
void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
- unsigned);
+ LaneBitmask);
void verifyLiveRangeSegment(const LiveRange&,
const LiveRange::const_iterator I, unsigned,
- unsigned);
- void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
+ LaneBitmask);
+ void verifyLiveRange(const LiveRange&, unsigned,
+ LaneBitmask LaneMask = LaneBitmask::getNone());
void verifyStackFrame();
@@ -310,15 +312,12 @@ void MachineVerifier::verifySlotIndexes() const {
void MachineVerifier::verifyProperties(const MachineFunction &MF) {
// If a pass has introduced virtual registers without clearing the
- // AllVRegsAllocated property (or set it without allocating the vregs)
+ // NoVRegs property (or set it without allocating the vregs)
// then report an error.
if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::AllVRegsAllocated) &&
- MRI->getNumVirtRegs()) {
- report(
- "Function has AllVRegsAllocated property but there are VReg operands",
- &MF);
- }
+ MachineFunctionProperties::Property::NoVRegs) &&
+ MRI->getNumVirtRegs())
+ report("Function has NoVRegs property but there are VReg operands", &MF);
}
unsigned MachineVerifier::verify(MachineFunction &MF) {
@@ -330,6 +329,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+ isFunctionRegBankSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::RegBankSelected);
+ isFunctionSelected = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected);
+
LiveVars = nullptr;
LiveInts = nullptr;
LiveStks = nullptr;
@@ -359,7 +363,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != &*MFI) {
- report("Bad instruction parent pointer", MFI);
+ report("Bad instruction parent pointer", &*MFI);
errs() << "Instruction: " << *MBBI;
continue;
}
@@ -381,7 +385,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
CurBundle = &*MBBI;
visitMachineBundleBefore(CurBundle);
} else if (!CurBundle)
- report("No bundle header", MBBI);
+ report("No bundle header", &*MBBI);
visitMachineInstrBefore(&*MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
const MachineInstr &MI = *MBBI;
@@ -474,11 +478,11 @@ void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
-void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit,
LaneBitmask LaneMask) const {
report_context_liverange(LR);
- errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
- if (LaneMask != 0)
+ report_context_vreg_regunit(VRegUnit);
+ if (LaneMask.any())
report_context_lanemask(LaneMask);
}
@@ -524,16 +528,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
regsReserved = MRI->getReservedRegs();
- // A sub-register of a reserved register is also reserved
- for (int Reg = regsReserved.find_first(); Reg>=0;
- Reg = regsReserved.find_next(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- // FIXME: This should probably be:
- // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
- regsReserved.set(*SubRegs);
- }
- }
-
markReachable(&MF->front());
// Build a set of the basic blocks in the function.
@@ -571,7 +565,8 @@ void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
FirstTerminator = nullptr;
- if (MRI->isSSA()) {
+ if (!MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) && MRI->tracksLiveness()) {
// If this block has allocatable physical registers live-in, check that
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
@@ -746,20 +741,21 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
regsLive.clear();
- for (const auto &LI : MBB->liveins()) {
- if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
- report("MBB live-in list contains non-physical register", MBB);
- continue;
+ if (MRI->tracksLiveness()) {
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
}
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
}
regsLiveInButUnused = regsLive;
- const MachineFrameInfo *MFI = MF->getFrameInfo();
- assert(MFI && "Function has no frame info");
- BitVector PR = MFI->getPristineRegs(*MF);
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ BitVector PR = MFI.getPristineRegs(*MF);
for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -850,6 +846,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
<< MI->getNumOperands() << " given.\n";
}
+ if (MI->isPHI() && MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
// Check the tied operands.
if (MI->isInlineAsm())
verifyInlineAsm(MI);
@@ -879,6 +879,35 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
+ // Check types.
+ if (isPreISelGenericOpcode(MCID.getOpcode())) {
+ if (isFunctionSelected)
+ report("Unexpected generic instruction in a Selected function", MI);
+
+ // Generic instructions specify equality constraints between some
+ // of their operands. Make sure these are consistent.
+ SmallVector<LLT, 4> Types;
+ for (unsigned i = 0; i < MCID.getNumOperands(); ++i) {
+ if (!MCID.OpInfo[i].isGenericType())
+ continue;
+ size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex();
+ Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+ LLT OpTy = MRI->getType(MI->getOperand(i).getReg());
+ if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy)
+ report("type mismatch in generic instruction", MI);
+ Types[TypeIdx] = OpTy;
+ }
+ }
+
+ // Generic opcodes must not have physical register operands.
+ if (isPreISelGenericOpcode(MCID.getOpcode())) {
+ for (auto &Op : MI->operands()) {
+ if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg()))
+ report("Generic instruction cannot have physical register", MI);
+ }
+ }
+
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
@@ -988,25 +1017,62 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
if (!RC) {
// This is a generic virtual register.
- // It must have a size and it must not have a SubIdx.
- unsigned Size = MRI->getSize(Reg);
- if (!Size) {
- report("Generic virtual register must have a size", MO, MONum);
+
+ // If we're post-Select, we can't have gvregs anymore.
+ if (isFunctionSelected) {
+ report("Generic virtual register invalid in a Selected function",
+ MO, MONum);
return;
}
- // Make sure the register fits into its register bank if any.
+
+ // The gvreg must have a type and it must not have a SubIdx.
+ LLT Ty = MRI->getType(Reg);
+ if (!Ty.isValid()) {
+ report("Generic virtual register must have a valid type", MO,
+ MONum);
+ return;
+ }
+
const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
- if (RegBank && RegBank->getSize() < Size) {
+
+ // If we're post-RegBankSelect, the gvreg must have a bank.
+ if (!RegBank && isFunctionRegBankSelected) {
+ report("Generic virtual register must have a bank in a "
+ "RegBankSelected function",
+ MO, MONum);
+ return;
+ }
+
+ // Make sure the register fits into its register bank if any.
+ if (RegBank && Ty.isValid() &&
+ RegBank->getSize() < Ty.getSizeInBits()) {
report("Register bank is too small for virtual register", MO,
MONum);
errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Size << "-bits\n";
+ << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+ << "-bits\n";
return;
}
if (SubIdx) {
- report("Generic virtual register does not subregister index", MO, MONum);
+ report("Generic virtual register does not subregister index", MO,
+ MONum);
return;
}
+
+ // If this is a target specific instruction and this operand
+ // has register class constraint, the virtual register must
+ // comply to it.
+ if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ report("Virtual register does not match instruction constraint", MO,
+ MONum);
+ errs() << "Expect register class "
+ << TRI->getRegClassName(
+ TII->getRegClass(MCID, MONum, TRI, *MF))
+ << " but got nothing\n";
+ return;
+ }
+
break;
}
if (SubIdx) {
@@ -1113,7 +1179,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
LiveQueryResult LRQ = LR.Query(UseIdx);
// Check if we have a segment at the use, note however that we only need one
// live subregister range, the others may be dead.
- if (!LRQ.valueIn() && LaneMask == 0) {
+ if (!LRQ.valueIn() && LaneMask.none()) {
report("No live segment at use", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
@@ -1123,7 +1189,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
report("Live range continues after kill flag", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
- if (LaneMask != 0)
+ if (LaneMask.any())
report_context_lanemask(LaneMask);
report_context(UseIdx);
}
@@ -1138,7 +1204,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
report("Inconsistent valno->def", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
- if (LaneMask != 0)
+ if (LaneMask.any())
report_context_lanemask(LaneMask);
report_context(*VNI);
report_context(DefIdx);
@@ -1147,7 +1213,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
report("No live segment at def", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
- if (LaneMask != 0)
+ if (LaneMask.any())
report_context_lanemask(LaneMask);
report_context(DefIdx);
}
@@ -1177,7 +1243,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
report("Live range continues after dead def flag", MO, MONum);
report_context_liverange(LR);
report_context_vreg_regunit(VRegOrUnit);
- if (LaneMask != 0)
+ if (LaneMask.any())
report_context_lanemask(LaneMask);
}
}
@@ -1199,7 +1265,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
MO->isKill()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
}
@@ -1225,9 +1291,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
LaneBitmask MOMask = SubRegIdx != 0
? TRI->getSubRegIndexLaneMask(SubRegIdx)
: MRI->getMaxLaneMaskForVReg(Reg);
- LaneBitmask LiveInMask = 0;
+ LaneBitmask LiveInMask;
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((MOMask & SR.LaneMask) == 0)
+ if ((MOMask & SR.LaneMask).none())
continue;
checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
LiveQueryResult LRQ = SR.Query(UseIdx);
@@ -1235,7 +1301,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
LiveInMask |= SR.LaneMask;
}
// At least parts of the register has to be live at the use.
- if ((LiveInMask & MOMask) == 0) {
+ if ((LiveInMask & MOMask).none()) {
report("No live subrange at use", MO, MONum);
report_context(LI);
report_context(UseIdx);
@@ -1327,7 +1393,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
? TRI->getSubRegIndexLaneMask(SubRegIdx)
: MRI->getMaxLaneMaskForVReg(Reg);
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((SR.LaneMask & MOMask) == 0)
+ if ((SR.LaneMask & MOMask).none())
continue;
checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
}
@@ -1640,8 +1706,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
!TRI->hasRegUnit(MOI->getReg(), Reg))
continue;
}
- if (LaneMask != 0 &&
- (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0)
+ if (LaneMask.any() &&
+ (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
continue;
hasDef = true;
if (MOI->isEarlyClobber())
@@ -1772,15 +1838,22 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || MOI->getReg() != Reg)
continue;
- if (LaneMask != 0 &&
- (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
- continue;
+ unsigned Sub = MOI->getSubReg();
+ LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
+ : LaneBitmask::getAll();
if (MOI->isDef()) {
- if (MOI->getSubReg() != 0)
+ if (Sub != 0) {
hasSubRegDef = true;
+ // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane
+ // mask for subregister defs. Read-undef defs will be handled by
+ // readsReg below.
+ SLM = ~SLM;
+ }
if (MOI->isDead())
hasDeadDef = true;
}
+ if (LaneMask.any() && (LaneMask & SLM).none())
+ continue;
if (MOI->readsReg())
hasRead = true;
}
@@ -1788,7 +1861,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// Make sure that the corresponding machine operand for a "dead" live
// range has the dead flag. We cannot perform this check for subregister
// liveranges as partially dead values are allowed.
- if (LaneMask == 0 && !hasDeadDef) {
+ if (LaneMask.none() && !hasDeadDef) {
report("Instruction ending live segment on dead slot has no dead flag",
MI);
report_context(LR, Reg, LaneMask);
@@ -1798,7 +1871,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (!hasRead) {
// When tracking subregister liveness, the main range must start new
// values on partial register writes, even if there is no read.
- if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
!hasSubRegDef) {
report("Instruction ending live segment doesn't read the register",
MI);
@@ -1842,7 +1915,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// All predecessors must have a live-out value if this is not a
// subregister liverange.
- if (!PVNI && LaneMask == 0) {
+ if (!PVNI && LaneMask.none()) {
report("Register not marked live out of predecessor", *PI);
report_context(LR, Reg, LaneMask);
report_context(*VNI);
@@ -1882,14 +1955,14 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
assert(TargetRegisterInfo::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
- LaneBitmask Mask = 0;
+ LaneBitmask Mask;
LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask) != 0) {
+ if ((Mask & SR.LaneMask).any()) {
report("Lane masks of sub ranges overlap in live interval", MF);
report_context(LI);
}
- if ((SR.LaneMask & ~MaxMask) != 0) {
+ if ((SR.LaneMask & ~MaxMask).any()) {
report("Subrange lanemask is invalid", MF);
report_context(LI);
}
@@ -1950,11 +2023,11 @@ void MachineVerifier::verifyStackFrame() {
SmallVector<StackStateOfBB, 8> SPState;
SPState.resize(MF->getNumBlockIDs());
- SmallPtrSet<const MachineBasicBlock*, 8> Reachable;
+ df_iterator_default_set<const MachineBasicBlock*> Reachable;
// Visit the MBBs in DFS order.
for (df_ext_iterator<const MachineFunction*,
- SmallPtrSet<const MachineBasicBlock*, 8> >
+ df_iterator_default_set<const MachineBasicBlock*> >
DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
DFI != DFE; ++DFI) {
const MachineBasicBlock *MBB = *DFI;
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index 0177e41..2a8531f 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -184,7 +184,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
PI != PE; ++PI) {
MachineInstr *PhiMI = *PI;
- if (&*MII == PhiMI)
+ if (MII == PhiMI)
++MII;
PhiMI->eraseFromParent();
}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index b8d5431..c67a25b 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -175,6 +175,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
ImpDefs.clear();
VRegPHIUseCount.clear();
+ MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+
return Changed;
}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 4cabc3a..4e67ff2 100644
--- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -54,6 +54,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
++InsertPoint;
}
- // Make sure the copy goes after any phi nodes however.
+ // Make sure the copy goes after any phi nodes but before
+ // any debug nodes.
return MBB->SkipPHIsAndLabels(InsertPoint);
}
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
index ccdaec1..50dd44f 100644
--- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -12,7 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/ParallelCG.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
@@ -78,7 +79,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
CodegenThreadPool.async(
[TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
LLVMContext Ctx;
- ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+ Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
MemoryBufferRef(StringRef(BC.data(), BC.size()),
"<split-module>"),
Ctx);
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
index 32468c9..ad9166f 100644
--- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -32,7 +32,7 @@ struct PatchableFunction : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &F) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
};
}
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 60b27dd..6d64345 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -70,17 +70,28 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "peephole-opt"
@@ -118,6 +129,7 @@ STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
namespace {
+
class ValueTrackerResult;
class PeepholeOptimizer : public MachineFunctionPass {
@@ -128,6 +140,7 @@ namespace {
public:
static char ID; // Pass identification
+
PeepholeOptimizer() : MachineFunctionPass(ID) {
initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
}
@@ -390,10 +403,12 @@ namespace {
/// register of the last source.
unsigned getReg() const { return Reg; }
};
-}
+
+} // end anonymous namespace
char PeepholeOptimizer::ID = 0;
char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+
INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
"Peephole Optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -737,6 +752,7 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
}
namespace {
+
/// \brief Helper class to rewrite the arguments of a copy-like instruction.
class CopyRewriter {
protected:
@@ -820,7 +836,6 @@ public:
TargetInstrInfo::RegSubRegPair Def,
PeepholeOptimizer::RewriteMapTy &RewriteMap,
bool HandleMultipleSources = true) {
-
TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
do {
ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
@@ -859,7 +874,7 @@ public:
const MachineOperand &MODef = NewPHI->getOperand(0);
return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
- } while (1);
+ } while (true);
return TargetInstrInfo::RegSubRegPair(0, 0);
}
@@ -1001,6 +1016,7 @@ public:
TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
return true;
}
+
bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
if (CurrentSrcIdx != 2)
return false;
@@ -1141,7 +1157,8 @@ public:
return true;
}
};
-} // End namespace.
+
+} // end anonymous namespace
/// \brief Get the appropriated CopyRewriter for \p MI.
/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
@@ -1523,11 +1540,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (MI->isDebugValue())
continue;
- // If we run into an instruction we can't fold across, discard
- // the load candidates.
- if (MI->isLoadFoldBarrier())
- FoldAsLoadDefCandidates.clear();
-
if (MI->isPosition() || MI->isPHI())
continue;
@@ -1571,7 +1583,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
<< '\n');
NAPhysToVirtMIs.clear();
- continue;
}
if ((isUncoalescableCopy(*MI) &&
@@ -1622,8 +1633,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// earlier load into MI.
if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
!FoldAsLoadDefCandidates.empty()) {
+
+ // We visit each operand even after successfully folding a previous
+ // one. This allows us to fold multiple loads into a single
+ // instruction. We do assume that optimizeLoadInstr doesn't insert
+ // foldable uses earlier in the argument list. Since we don't restart
+ // iteration, we'd miss such cases.
const MCInstrDesc &MIDesc = MI->getDesc();
- for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+ for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
++i) {
const MachineOperand &MOp = MI->getOperand(i);
if (!MOp.isReg())
@@ -1650,13 +1667,23 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MRI->markUsesInDebugValueAsUndef(FoldedReg);
FoldAsLoadDefCandidates.erase(FoldedReg);
++NumLoadFold;
- // MI is replaced with FoldMI.
+
+ // MI is replaced with FoldMI so we can continue trying to fold
Changed = true;
- break;
+ MI = FoldMI;
}
}
}
}
+
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates. Note: We might be able to fold *into* this
+ // instruction, so this needs to be after the folding logic.
+ if (MI->isLoadFoldBarrier()) {
+ DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
+ FoldAsLoadDefCandidates.clear();
+ }
+
}
}
@@ -1688,7 +1715,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
// Bitcasts with more than one def are not supported.
if (Def->getDesc().getNumDefs() != 1)
return ValueTrackerResult();
- if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ const MachineOperand DefOp = Def->getOperand(DefIdx);
+ if (DefOp.getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of the src.
// Bails as we do not support composing subregs yet.
return ValueTrackerResult();
@@ -1708,6 +1736,14 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
return ValueTrackerResult();
SrcIdx = OpIdx;
}
+
+ // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
+ // will break the assumed guarantees for the upper bits.
+ for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
+ if (UseMI.isSubregToReg())
+ return ValueTrackerResult();
+ }
+
const MachineOperand &Src = Def->getOperand(SrcIdx);
return ValueTrackerResult(Src.getReg(), Src.getSubReg());
}
@@ -1806,8 +1842,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
// sub-register we are tracking.
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
if (!TRI ||
- (TRI->getSubRegIndexLaneMask(DefSubReg) &
- TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
+ !(TRI->getSubRegIndexLaneMask(DefSubReg) &
+ TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
return ValueTrackerResult();
// At this point, the value is available in v0 via the same subreg
// we used for Def.
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 3fce307..6081916 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -98,7 +98,7 @@ namespace {
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
bool runOnMachineFunction(MachineFunction &Fn) override;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 20a9a39..5fca7fa 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -80,7 +80,7 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
MachineFunctionProperties MFP;
if (UsesCalleeSaves)
- MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated);
+ MFP.set(MachineFunctionProperties::Property::NoVRegs);
return MFP;
}
@@ -117,6 +117,10 @@ private:
// TRI->requiresFrameIndexScavenging() for the current function.
bool FrameIndexVirtualScavenging;
+ // Flag to control whether the scavenger should be passed even though
+ // FrameIndexVirtualScavenging is used.
+ bool FrameIndexEliminationScavenging;
+
void calculateCallFrameInfo(MachineFunction &Fn);
void calculateSaveRestoreBlocks(MachineFunction &Fn);
@@ -176,6 +180,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+ FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+ TRI->requiresFrameIndexReplacementScavenging(Fn);
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
@@ -221,8 +227,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
}
// Warn on stack size when we exceeds the given limit.
- MachineFrameInfo *MFI = Fn.getFrameInfo();
- uint64_t StackSize = MFI->getStackSize();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
+ uint64_t StackSize = MFI.getStackSize();
if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
F->getContext().diagnose(DiagStackSize);
@@ -231,8 +237,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
delete RS;
SaveBlocks.clear();
RestoreBlocks.clear();
- MFI->setSavePoint(nullptr);
- MFI->setRestorePoint(nullptr);
+ MFI.setSavePoint(nullptr);
+ MFI.setRestorePoint(nullptr);
return true;
}
@@ -242,10 +248,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- MachineFrameInfo *MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
- bool AdjustsStack = MFI->adjustsStack();
+ bool AdjustsStack = MFI.adjustsStack();
// Get the function call frame set-up and tear-down instruction opcode
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
@@ -274,8 +280,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
AdjustsStack = true;
}
- MFI->setAdjustsStack(AdjustsStack);
- MFI->setMaxCallFrameSize(MaxCallFrameSize);
+ MFI.setAdjustsStack(AdjustsStack);
+ MFI.setMaxCallFrameSize(MaxCallFrameSize);
for (std::vector<MachineBasicBlock::iterator>::iterator
i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -293,17 +299,17 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
/// Compute the sets of entry and return blocks for saving and restoring
/// callee-saved registers, and placing prolog and epilog code.
void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
- const MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const MachineFrameInfo &MFI = Fn.getFrameInfo();
// Even when we do not change any CSR, we still want to insert the
// prologue and epilogue of the function.
// So set the save points for those.
// Use the points found by shrink-wrapping, if any.
- if (MFI->getSavePoint()) {
- SaveBlocks.push_back(MFI->getSavePoint());
- assert(MFI->getRestorePoint() && "Both restore and save must be set");
- MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ if (MFI.getSavePoint()) {
+ SaveBlocks.push_back(MFI.getSavePoint());
+ assert(MFI.getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
// If RestoreBlock does not have any successor and is not a return block
// then the end point is unreachable and we do not need to insert any
// epilogue.
@@ -340,7 +346,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
}
const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
- MachineFrameInfo *MFI = F.getFrameInfo();
+ MachineFrameInfo &MFI = F.getFrameInfo();
if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
// If target doesn't implement this, use generic code.
@@ -379,26 +385,26 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
// the TargetRegisterClass if the stack alignment is smaller. Use the
// min.
Align = std::min(Align, StackAlign);
- FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
FrameIdx =
- MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
}
CS.setFrameIdx(FrameIdx);
}
}
- MFI->setCalleeSavedInfo(CSI);
+ MFI.setCalleeSavedInfo(CSI);
}
/// Helper function to update the liveness information for the callee-saved
/// registers.
static void updateLiveness(MachineFunction &MF) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
// Visited will contain all the basic blocks that are in the region
// where the callee saved registers are alive:
// - Anything that is not Save or Restore -> LiveThrough.
@@ -409,7 +415,7 @@ static void updateLiveness(MachineFunction &MF) {
SmallPtrSet<MachineBasicBlock *, 8> Visited;
SmallVector<MachineBasicBlock *, 8> WorkList;
MachineBasicBlock *Entry = &MF.front();
- MachineBasicBlock *Save = MFI->getSavePoint();
+ MachineBasicBlock *Save = MFI.getSavePoint();
if (!Save)
Save = Entry;
@@ -420,7 +426,7 @@ static void updateLiveness(MachineFunction &MF) {
}
Visited.insert(Save);
- MachineBasicBlock *Restore = MFI->getRestorePoint();
+ MachineBasicBlock *Restore = MFI.getRestorePoint();
if (Restore)
// By construction Restore cannot be visited, otherwise it
// means there exists a path to Restore that does not go
@@ -440,7 +446,7 @@ static void updateLiveness(MachineFunction &MF) {
WorkList.push_back(SuccBB);
}
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
for (MachineBasicBlock *MBB : Visited) {
@@ -460,10 +466,10 @@ static void insertCSRSpillsAndRestores(MachineFunction &Fn,
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks) {
// Get callee saved register information.
- MachineFrameInfo *MFI = Fn.getFrameInfo();
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- MFI->setCalleeSavedInfoValid(true);
+ MFI.setCalleeSavedInfoValid(true);
// Early exit if no callee saved registers are modified!
if (CSI.empty())
@@ -551,14 +557,14 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void
-AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
unsigned &MaxAlign, unsigned Skew) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
- Offset += MFI->getObjectSize(FrameIdx);
+ Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ unsigned Align = MFI.getObjectAlignment(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
@@ -569,11 +575,11 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
if (StackGrowsDown) {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
- MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
- MFI->setObjectOffset(FrameIdx, Offset);
- Offset += MFI->getObjectSize(FrameIdx);
+ MFI.setObjectOffset(FrameIdx, Offset);
+ Offset += MFI.getObjectSize(FrameIdx);
}
}
@@ -581,7 +587,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
/// track of them in StackBytesFree.
///
static inline void
-computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
+computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
int64_t FixedCSEnd, BitVector &StackBytesFree) {
// Avoid undefined int64_t -> int conversion below in extreme case.
@@ -592,7 +598,7 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
SmallVector<int, 16> AllocatedFrameSlots;
// Add fixed objects.
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
+ for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
AllocatedFrameSlots.push_back(i);
// Add callee-save objects.
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
@@ -601,8 +607,8 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
// because of the FixedCSEnd check above.
- int ObjOffset = MFI->getObjectOffset(i);
- int ObjSize = MFI->getObjectSize(i);
+ int ObjOffset = MFI.getObjectOffset(i);
+ int ObjSize = MFI.getObjectSize(i);
int ObjStart, ObjEnd;
if (StackGrowsDown) {
// ObjOffset is negative when StackGrowsDown is true.
@@ -621,10 +627,10 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
/// Assign frame object to an unused portion of the stack in the fixed stack
/// object range. Return true if the allocation was successful.
///
-static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
+static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, unsigned MaxAlign,
BitVector &StackBytesFree) {
- if (MFI->isVariableSizedObjectIndex(FrameIdx))
+ if (MFI.isVariableSizedObjectIndex(FrameIdx))
return false;
if (StackBytesFree.none()) {
@@ -634,11 +640,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
return false;
}
- unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
+ unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx);
if (ObjAlign > MaxAlign)
return false;
- int64_t ObjSize = MFI->getObjectSize(FrameIdx);
+ int64_t ObjSize = MFI.getObjectSize(FrameIdx);
int FreeStart;
for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
FreeStart = StackBytesFree.find_next(FreeStart)) {
@@ -668,11 +674,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
int ObjStart = -(FreeStart + ObjSize);
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
<< "]\n");
- MFI->setObjectOffset(FrameIdx, ObjStart);
+ MFI.setObjectOffset(FrameIdx, ObjStart);
} else {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
<< "]\n");
- MFI->setObjectOffset(FrameIdx, FreeStart);
+ MFI.setObjectOffset(FrameIdx, FreeStart);
}
StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
@@ -684,7 +690,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
static void
AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
- MachineFrameInfo *MFI, bool StackGrowsDown,
+ MachineFrameInfo &MFI, bool StackGrowsDown,
int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
@@ -706,7 +712,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Loop over all of the stack objects, assigning sequential addresses...
- MachineFrameInfo *MFI = Fn.getFrameInfo();
+ MachineFrameInfo &MFI = Fn.getFrameInfo();
// Start at the beginning of the local area.
// The Offset is the distance from the stack top in the direction
@@ -725,17 +731,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// non-fixed objects can't be allocated right at the start of local area.
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
int64_t FixedOff;
if (StackGrowsDown) {
// The maximum distance from the stack pointer is at lower address of
// the object -- which is given by offset. For down growing stack
// the offset is negative, so we negate the offset to get the distance.
- FixedOff = -MFI->getObjectOffset(i);
+ FixedOff = -MFI.getObjectOffset(i);
} else {
// The maximum distance from the start pointer is at the upper
// address of the object.
- FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
}
if (FixedOff > Offset) Offset = FixedOff;
}
@@ -746,32 +752,32 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
// If the stack grows down, we need to add the size to find the lowest
// address of the object.
- Offset += MFI->getObjectSize(i);
+ Offset += MFI.getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
+ unsigned Align = MFI.getObjectAlignment(i);
// Adjust to alignment boundary
Offset = alignTo(Offset, Align, Skew);
DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
- MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ MFI.setObjectOffset(i, -Offset); // Set the computed offset
}
} else if (MaxCSFrameIndex >= MinCSFrameIndex) {
// Be careful about underflow in comparisons agains MinCSFrameIndex.
for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
- unsigned Align = MFI->getObjectAlignment(i);
+ unsigned Align = MFI.getObjectAlignment(i);
// Adjust to alignment boundary
Offset = alignTo(Offset, Align, Skew);
DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
- MFI->setObjectOffset(i, Offset);
- Offset += MFI->getObjectSize(i);
+ MFI.setObjectOffset(i, Offset);
+ Offset += MFI.getObjectSize(i);
}
}
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
- unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned MaxAlign = MFI.getMaxAlignment();
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
@@ -793,8 +799,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// check for whether the frame is large enough to want to use virtual
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
- if (MFI->getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI->getLocalFrameMaxAlign();
+ if (MFI.getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
Offset = alignTo(Offset, Align, Skew);
@@ -802,15 +808,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
// Resolve offsets for objects in the local block.
- for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
- std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
FIOffset << "]\n");
- MFI->setObjectOffset(Entry.first, FIOffset);
+ MFI.setObjectOffset(Entry.first, FIOffset);
}
// Allocate the local block
- Offset += MFI->getLocalFrameSize();
+ Offset += MFI.getLocalFrameSize();
MaxAlign = std::max(Align, MaxAlign);
}
@@ -823,30 +829,30 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI->getStackProtectorIndex() >= 0) {
+ if (MFI.getStackProtectorIndex() >= 0) {
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
Offset, MaxAlign, Skew);
// Assign large stack objects first.
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isObjectPreAllocated(i) &&
- MFI->getUseLocalStackAllocationBlock())
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isObjectPreAllocated(i) &&
+ MFI.getUseLocalStackAllocationBlock())
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && RS->isScavengingFrameIndex((int)i))
continue;
- if (MFI->isDeadObjectIndex(i))
+ if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i ||
+ if (MFI.getStackProtectorIndex() == (int)i ||
EHRegNodeFrameIndex == (int)i)
continue;
- switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
case StackProtector::SSPLK_None:
continue;
case StackProtector::SSPLK_SmallArray:
@@ -874,17 +880,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Then prepare to assign frame offsets to stack objects that are not used to
// spill callee saved registers.
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isObjectPreAllocated(i) &&
- MFI->getUseLocalStackAllocationBlock())
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && RS->isScavengingFrameIndex((int)i))
continue;
- if (MFI->isDeadObjectIndex(i))
+ if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI->getStackProtectorIndex() == (int)i ||
+ if (MFI.getStackProtectorIndex() == (int)i ||
EHRegNodeFrameIndex == (int)i)
continue;
if (ProtectedObjs.count(i))
@@ -911,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
BitVector StackBytesFree;
if (!ObjectsToAllocate.empty() &&
Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
- MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+ MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
FixedCSEnd, StackBytesFree);
@@ -935,8 +940,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
- Offset += MFI->getMaxCallFrameSize();
+ if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI.getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
// any calls or alloca's, align to the target's StackAlignment value to
@@ -944,8 +949,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
StackAlign = TFI.getStackAlignment();
else
StackAlign = TFI.getTransientStackAlignment();
@@ -958,7 +963,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Update frame info to pretend that this is part of the stack...
int64_t StackSize = Offset - LocalAreaOffset;
- MFI->setStackSize(StackSize);
+ MFI.setStackSize(StackSize);
NumBytesStackSpace += StackSize;
}
@@ -1009,7 +1014,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
SPState.resize(Fn.getNumBlockIDs());
- SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ df_iterator_default_set<MachineBasicBlock*> Reachable;
// Iterate over the reachable blocks in DFS order.
for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
@@ -1047,7 +1052,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
- if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB);
+ if (RS && FrameIndexEliminationScavenging)
+ RS->enterBasicBlock(*BB);
bool InsideCallSequence = false;
@@ -1116,7 +1122,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
// use that target machine register info object to eliminate
// it.
TRI.eliminateFrameIndex(MI, SPAdj, i,
- FrameIndexVirtualScavenging ? nullptr : RS);
+ FrameIndexEliminationScavenging ? RS : nullptr);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -1132,7 +1138,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
// the SP adjustment made by each instruction in the sequence.
// This includes both the frame setup/destroy pseudos (handled above),
// as well as other instructions that have side effects w.r.t the SP.
- // Note that this must come after eliminateFrameIndex, because
+ // Note that this must come after eliminateFrameIndex, because
// if I itself referred to a frame index, we shouldn't count its own
// adjustment.
if (DidFinishLoop && InsideCallSequence)
@@ -1141,7 +1147,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
if (DoIncr && I != BB->end()) ++I;
// Update register states.
- if (RS && !FrameIndexVirtualScavenging && DidFinishLoop)
+ if (RS && FrameIndexEliminationScavenging && DidFinishLoop)
RS->forward(MI);
}
}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 93eeb9c..fb49a93 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -41,7 +41,8 @@ static cl::opt<bool, true>
VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
cl::desc("Verify during register allocation"));
-const char RegAllocBase::TimerGroupName[] = "Register Allocation";
+const char RegAllocBase::TimerGroupName[] = "regalloc";
+const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
bool RegAllocBase::VerifyEnabled = false;
//===----------------------------------------------------------------------===//
@@ -67,7 +68,8 @@ void RegAllocBase::init(VirtRegMap &vrm,
// register, unify them with the corresponding LiveIntervalUnion, otherwise push
// them on the priority queue for later assignment.
void RegAllocBase::seedLiveRegs() {
- NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
@@ -143,6 +145,7 @@ void RegAllocBase::allocatePhysRegs() {
continue;
}
DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(!SplitVirtReg->empty() && "expecting non-empty interval");
assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
index 296ffe8..d8921b5 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -105,6 +105,7 @@ protected:
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
+ static const char TimerGroupDescription[];
/// Method called when the allocator is about to remove a LiveInterval.
virtual void aboutToRemoveInterval(LiveInterval &LI) {}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 11dfda6..a558e37 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -76,9 +76,7 @@ public:
RABasic();
/// Return the pass name.
- const char* getPassName() const override {
- return "Basic Register Allocator";
- }
+ StringRef getPassName() const override { return "Basic Register Allocator"; }
/// RABasic analysis usage.
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -105,6 +103,11 @@ public:
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 55fb33e..fd759bc 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -149,18 +149,21 @@ namespace {
spillImpossible = ~0u
};
public:
- const char *getPassName() const override {
- return "Fast Register Allocator";
- }
+ StringRef getPassName() const override { return "Fast Register Allocator"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
private:
@@ -209,8 +212,8 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return SS; // Already has space allocated?
// Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
@@ -360,7 +363,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
break;
case regReserved:
PhysRegState[PhysReg] = regFree;
- // Fall through
+ LLVM_FALLTHROUGH;
case regFree:
MO.setIsKill();
return;
@@ -389,7 +392,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
assert((TRI->isSuperRegister(PhysReg, Alias) ||
TRI->isSuperRegister(Alias, PhysReg)) &&
"Instruction is not using a subregister of a reserved register");
- // Fall through.
+ LLVM_FALLTHROUGH;
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
@@ -421,7 +424,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
break;
default:
spillVirtReg(MI, VirtReg);
- // Fall through.
+ LLVM_FALLTHROUGH;
case regFree:
case regReserved:
PhysRegState[PhysReg] = NewState;
@@ -437,7 +440,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
break;
default:
spillVirtReg(MI, VirtReg);
- // Fall through.
+ LLVM_FALLTHROUGH;
case regFree:
case regReserved:
PhysRegState[Alias] = regDisabled;
@@ -1093,8 +1096,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
UsedInInstr.clear();
UsedInInstr.setUniverse(TRI->getNumRegUnits());
- assert(!MRI->isSSA() && "regalloc requires leaving SSA");
-
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index c4d4b1e..c47cfb1 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -61,8 +61,7 @@ static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
cl::desc("Spill mode for splitting live ranges"),
cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
- clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
- clEnumValEnd),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
cl::init(SplitEditor::SM_Speed));
static cl::opt<unsigned>
@@ -318,9 +317,7 @@ public:
RAGreedy();
/// Return the pass name.
- const char* getPassName() const override {
- return "Greedy Register Allocator";
- }
+ StringRef getPassName() const override { return "Greedy Register Allocator"; }
/// RAGreedy analysis usage.
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -334,6 +331,11 @@ public:
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
static char ID;
private:
@@ -421,6 +423,24 @@ private:
} // end anonymous namespace
char RAGreedy::ID = 0;
+char &llvm::RAGreedyID = RAGreedy::ID;
+
+INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
+ "Greedy Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_END(RAGreedy, "greedy",
+ "Greedy Register Allocator", false, false)
#ifndef NDEBUG
const char *const RAGreedy::StageName[] = {
@@ -444,19 +464,6 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
}
RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
- initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
- initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
- initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
}
void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -639,6 +646,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
evictInterference(VirtReg, Hint, NewVRegs);
return Hint;
}
+ // Record the missed hint, we may be able to recover
+ // at the end if the surrounding allocation changed.
+ SetOfBrokenHints.insert(&VirtReg);
}
// Try to evict interference from a cheaper alternative.
@@ -859,7 +869,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs,
unsigned CostPerUseLimit) {
- NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+ TimePassesIsEnabled);
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
@@ -1957,7 +1968,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Local intervals are handled separately.
if (LIS->intervalIsInOneMBB(VirtReg)) {
- NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
SA->analyze(&VirtReg);
unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
@@ -1965,7 +1977,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
return tryInstructionSplit(VirtReg, Order, NewVRegs);
}
- NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
SA->analyze(&VirtReg);
@@ -2103,6 +2116,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
FixedRegisters.insert(VirtReg.reg);
+ SmallVector<unsigned, 4> CurrentNewVRegs;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
@@ -2110,6 +2124,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
<< PrintReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
VirtRegToPhysReg.clear();
+ CurrentNewVRegs.clear();
// It is only possible to recolor virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) >
@@ -2154,8 +2169,11 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// If we cannot recolor all the interferences, we will have to start again
// at this point for the next physical register.
SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
- if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
- Depth)) {
+ if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
+ FixedRegisters, Depth)) {
+ // Push the queued vregs into the main queue.
+ for (unsigned NewVReg : CurrentNewVRegs)
+ NewVRegs.push_back(NewVReg);
// Do not mess up with the global assignment process.
// I.e., VirtReg must be unassigned.
Matrix->unassign(VirtReg);
@@ -2169,6 +2187,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
FixedRegisters = SaveFixedRegisters;
Matrix->unassign(VirtReg);
+ // For a newly created vreg which is also in RecoloringCandidates,
+ // don't add it to NewVRegs because its physical register will be restored
+ // below. Other vregs in CurrentNewVRegs are created by calling
+ // selectOrSplit and should be added into NewVRegs.
+ for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(),
+ End = CurrentNewVRegs.end();
+ Next != End; ++Next) {
+ if (RecoloringCandidates.count(&LIS->getInterval(*Next)))
+ continue;
+ NewVRegs.push_back(*Next);
+ }
+
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
@@ -2201,10 +2231,21 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
unsigned PhysReg;
PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
- if (PhysReg == ~0u || !PhysReg)
+ // When splitting happens, the live-range may actually be empty.
+ // In that case, this is okay to continue the recoloring even
+ // if we did not find an alternative color for it. Indeed,
+ // there will not be anything to color for LI in the end.
+ if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
return false;
+
+ if (!PhysReg) {
+ assert(LI->empty() && "Only empty live-range do not require a register");
+ DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n");
+ continue;
+ }
DEBUG(dbgs() << "Recoloring of " << *LI
<< " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+
Matrix->assign(*LI, PhysReg);
FixedRegisters.insert(LI->reg);
}
@@ -2519,7 +2560,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return PhysReg;
}
- assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+ assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
// The first time we see a live range, don't try to split or spill.
// Wait until the second time, when all smaller ranges have been allocated.
@@ -2531,17 +2572,20 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}
+ if (Stage < RS_Spill) {
+ // Try splitting VirtReg or interferences.
+ unsigned NewVRegSizeBefore = NewVRegs.size();
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+ return PhysReg;
+ }
+
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class wil report it.
if (Stage >= RS_Done || !VirtReg.isSpillable())
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
Depth);
- // Try splitting VirtReg or interferences.
- unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
- if (PhysReg || !NewVRegs.empty())
- return PhysReg;
-
// Finally spill VirtReg itself.
if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
// TODO: This is experimental and in particular, we do not model
@@ -2552,7 +2596,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
DEBUG(dbgs() << "Do as if this register is in memory\n");
NewVRegs.push_back(VirtReg.reg);
} else {
- NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("spill", "Spiller", TimerGroupName,
+ TimerGroupDescription, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index d1221ec..101b30b 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -99,9 +99,7 @@ public:
}
/// Return the pass name.
- const char* getPassName() const override {
- return "PBQP Register Allocator";
- }
+ StringRef getPassName() const override { return "PBQP Register Allocator"; }
/// PBQP analysis usage.
void getAnalysisUsage(AnalysisUsage &au) const override;
@@ -109,6 +107,11 @@ public:
/// Perform register allocation
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
+
private:
typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 50b8854..ece44c2 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -48,7 +48,7 @@ public:
initializeRegUsageInfoCollectorPass(Registry);
}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Register Usage Information Collector Pass";
}
@@ -57,10 +57,6 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
-
-private:
- void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask,
- unsigned PReg);
};
} // end of anonymous namespace
@@ -76,13 +72,6 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
return new RegUsageInfoCollector();
}
-void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI,
- uint32_t *RegMask, unsigned PReg) {
- // If PReg is clobbered then all of its alias are also clobbered.
- for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
- RegMask[*AI / 32] &= ~(1u << *AI % 32);
-}
-
void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<PhysicalRegisterUsageInfo>();
AU.setPreservesAll();
@@ -116,7 +105,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
if (MRI->isPhysRegModified(PReg, true))
- markRegClobbered(TRI, &RegMask[0], PReg);
+ RegMask[PReg / 32] &= ~(1u << PReg % 32);
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 7595661..5cc35bf 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -52,7 +52,7 @@ public:
initializeRegUsageInfoPropagationPassPass(Registry);
}
- const char *getPassName() const override { return RUIP_NAME; }
+ StringRef getPassName() const override { return RUIP_NAME; }
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 617ece9..4bb3c22 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -815,14 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
for (LiveInterval::SubRange &SB : IntB.subranges()) {
LaneBitmask BMask = SB.LaneMask;
LaneBitmask Common = BMask & AMask;
- if (Common == 0)
+ if (Common.none())
continue;
DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
<< " into " << PrintLaneMask(Common) << '\n');
LaneBitmask BRest = BMask & ~AMask;
LiveInterval::SubRange *CommonRange;
- if (BRest != 0) {
+ if (BRest.any()) {
SB.LaneMask = BRest;
DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
<< '\n');
@@ -841,7 +841,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
AMask &= ~BMask;
}
- if (AMask != 0) {
+ if (AMask.any()) {
DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
@@ -975,6 +975,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
NewRC = CommonRC;
DstIdx = 0;
DefMO.setSubReg(0);
+ DefMO.setIsUndef(false); // Only subregs can have def+undef.
}
}
}
@@ -1060,7 +1061,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SR.createDeadDef(DefIndex, Alloc);
MaxMask &= ~SR.LaneMask;
}
- if (MaxMask != 0) {
+ if (MaxMask.any()) {
LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
SR->createDeadDef(DefIndex, Alloc);
}
@@ -1153,7 +1154,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
- if ((SR.LaneMask & SrcMask) == 0)
+ if ((SR.LaneMask & SrcMask).none())
continue;
if (SR.liveAt(Idx))
return false;
@@ -1174,7 +1175,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// The affected subregister segments can be removed.
LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
for (LiveInterval::SubRange &SR : DstLI.subranges()) {
- if ((SR.LaneMask & DstMask) == 0)
+ if ((SR.LaneMask & DstMask).none())
continue;
VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
@@ -1193,10 +1194,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
SlotIndex UseIdx = LIS->getInstructionIndex(MI);
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
bool isLive;
- if (UseMask != ~0u && DstLI.hasSubRanges()) {
+ if (!UseMask.all() && DstLI.hasSubRanges()) {
isLive = false;
for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
- if ((SR.LaneMask & UseMask) == 0)
+ if ((SR.LaneMask & UseMask).none())
continue;
if (SR.liveAt(UseIdx)) {
isLive = true;
@@ -1210,6 +1211,17 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
MO.setIsUndef(true);
DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
}
+
+ // A def of a subregister may be a use of the other subregisters, so
+ // deleting a def of a subregister may also remove uses. Since CopyMI
+ // is still part of the function (but about to be erased), mark all
+ // defs of DstReg in it as <undef>, so that shrinkToUses would
+ // ignore them.
+ for (MachineOperand &MO : CopyMI->operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+ MO.setIsUndef(true);
+ LIS->shrinkToUses(&DstLI);
+
return true;
}
@@ -1220,7 +1232,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
Mask = ~Mask;
bool IsUndef = true;
for (const LiveInterval::SubRange &S : Int.subranges()) {
- if ((S.LaneMask & Mask) == 0)
+ if ((S.LaneMask & Mask).none())
continue;
if (S.liveAt(UseIdx)) {
IsUndef = false;
@@ -1446,7 +1458,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
});
}
- ShrinkMask = 0;
+ ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
// Okay, attempt to join these two intervals. On failure, this returns false.
@@ -1504,10 +1516,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
// Shrink subregister ranges if necessary.
- if (ShrinkMask != 0) {
+ if (ShrinkMask.any()) {
LiveInterval &LI = LIS->getInterval(CP.getDstReg());
for (LiveInterval::SubRange &S : LI.subranges()) {
- if ((S.LaneMask & ShrinkMask) == 0)
+ if ((S.LaneMask & ShrinkMask).none())
continue;
DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
<< ")\n");
@@ -1544,9 +1556,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
unsigned DstReg = CP.getDstReg();
+ unsigned SrcReg = CP.getSrcReg();
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(DstReg) && "Not a reserved register");
- LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &RHS = LIS->getInterval(SrcReg);
DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
assert(RHS.containsOneValue() && "Invalid join with reserved register");
@@ -1558,11 +1571,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// Deny any overlapping intervals. This depends on all the reserved
// register live ranges to look like dead defs.
- for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
- if (RHS.overlaps(LIS->getRegUnit(*UI))) {
- DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
- return false;
+ if (!MRI->isConstantPhysReg(DstReg)) {
+ for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+ // Abort if not all the regunits are reserved.
+ for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+ if (!MRI->isReserved(*RI))
+ return false;
+ }
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
}
+ }
// Skip any value computations, we are not adding new values to the
// reserved register. Also skip merging the live ranges, the reserved
@@ -1572,43 +1593,64 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// Delete the identity copy.
MachineInstr *CopyMI;
if (CP.isFlipped()) {
- CopyMI = MRI->getVRegDef(RHS.reg);
+ // Physreg is copied into vreg
+ // %vregY = COPY %X
+ // ... //< no other def of %X here
+ // use %vregY
+ // =>
+ // ...
+ // use %X
+ CopyMI = MRI->getVRegDef(SrcReg);
} else {
- if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+ // VReg is copied into physreg:
+ // %vregX = def
+ // ... //< no other def or use of %Y here
+ // %Y = COPY %vregX
+ // =>
+ // %Y = def
+ // ...
+ if (!MRI->hasOneNonDBGUse(SrcReg)) {
DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
return false;
}
- MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
- CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
- const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
- const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
-
- // We checked above that there are no interfering defs of the physical
- // register. However, for this case, where we intent to move up the def of
- // the physical register, we also need to check for interfering uses.
- SlotIndexes *Indexes = LIS->getSlotIndexes();
- for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
- SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
- MachineInstr *MI = LIS->getInstructionFromIndex(SI);
- if (MI->readsRegister(DstReg, TRI)) {
- DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
- return false;
- }
+ if (!LIS->intervalIsInOneMBB(RHS)) {
+ DEBUG(dbgs() << "\t\tComplex control flow!\n");
+ return false;
+ }
- // We must also check for clobbers caused by regmasks.
- for (const auto &MO : MI->operands()) {
- if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
- DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+ MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
+ CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
+ SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
+
+ if (!MRI->isConstantPhysReg(DstReg)) {
+ // We checked above that there are no interfering defs of the physical
+ // register. However, for this case, where we intent to move up the def of
+ // the physical register, we also need to check for interfering uses.
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+ SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+ if (MI->readsRegister(DstReg, TRI)) {
+ DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
+
+ // We must also check for clobbers caused by regmasks.
+ for (const auto &MO : MI->operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
+ DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+ return false;
+ }
+ }
}
}
// We're going to remove the copy which defines a physical reserved
// register, so remove its valno, etc.
- DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
- << CopyRegIdx << "\n");
+ DEBUG(dbgs() << "\t\tRemoving phys reg def of " << PrintReg(DstReg, TRI)
+ << " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
// Create a new dead def at the new def location.
@@ -1795,11 +1837,11 @@ class JoinVals {
/// True once Pruned above has been computed.
bool PrunedComputed;
- Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+ Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(),
RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
Pruned(false), PrunedComputed(false) {}
- bool isAnalyzed() const { return WriteLanes != 0; }
+ bool isAnalyzed() const { return WriteLanes.any(); }
};
/// One entry per value number in LI.
@@ -1889,12 +1931,22 @@ public:
/// no useful information and can be removed.
void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
+ /// Pruning values in subranges can lead to removing segments in these
+ /// subranges started by IMPLICIT_DEFs. The corresponding segments in
+ /// the main range also need to be removed. This function will mark
+ /// the corresponding values in the main range as pruned, so that
+ /// eraseInstrs can do the final cleanup.
+ /// The parameter @p LI must be the interval whose main range is the
+ /// live range LR.
+ void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
+
/// Erase any machine instructions that have been coalesced away.
/// Add erased instructions to ErasedInstrs.
/// Add foreign virtual registers to ShrinkRegs if their live range ended at
/// the erased instrs.
void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
- SmallVectorImpl<unsigned> &ShrinkRegs);
+ SmallVectorImpl<unsigned> &ShrinkRegs,
+ LiveInterval *LI = nullptr);
/// Remove liverange defs at places where implicit defs will be removed.
void removeImplicitDefs();
@@ -1906,7 +1958,7 @@ public:
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
- LaneBitmask L = 0;
+ LaneBitmask L;
for (const MachineOperand &MO : DefMI->operands()) {
if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
continue;
@@ -1944,7 +1996,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
for (const LiveInterval::SubRange &S : LI.subranges()) {
// Transform lanemask to a mask in the joined live interval.
LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
- if ((SMask & LaneMask) == 0)
+ if ((SMask & LaneMask).none())
continue;
LiveQueryResult LRQ = S.Query(Def);
ValueIn = LRQ.valueIn();
@@ -1984,7 +2036,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
assert(!V.isAnalyzed() && "Value has already been analyzed!");
VNInfo *VNI = LR.getValNumInfo(ValNo);
if (VNI->isUnused()) {
- V.WriteLanes = ~0u;
+ V.WriteLanes = LaneBitmask::getAll();
return CR_Keep;
}
@@ -1992,16 +2044,17 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
- LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1)
+ : TRI->getSubRegIndexLaneMask(SubIdx);
V.ValidLanes = V.WriteLanes = Lanes;
} else {
DefMI = Indexes->getInstructionFromIndex(VNI->def);
assert(DefMI != nullptr);
if (SubRangeJoin) {
// We don't care about the lanes when joining subregister ranges.
- V.WriteLanes = V.ValidLanes = 1;
+ V.WriteLanes = V.ValidLanes = LaneBitmask(1);
if (DefMI->isImplicitDef()) {
- V.ValidLanes = 0;
+ V.ValidLanes = LaneBitmask::getNone();
V.ErasableImplicitDef = true;
}
} else {
@@ -2074,7 +2127,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// predecessor, the PHI itself can't introduce any conflicts.
if (VNI->isPHIDef())
return CR_Merge;
- if (V.ValidLanes & OtherV.ValidLanes)
+ if ((V.ValidLanes & OtherV.ValidLanes).any())
// Overlapping lanes can't be resolved.
return CR_Impossible;
else
@@ -2119,7 +2172,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// We need the def for the subregister if there is nothing else live at the
// subrange at this point.
if (TrackSubRegLiveness
- && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0)
+ && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
return CR_Replace;
return CR_Erase;
}
@@ -2159,7 +2212,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
// handles this complex value mapping.
- if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ if ((V.WriteLanes & OtherV.ValidLanes).none())
return CR_Replace;
// If the other live range is killed by DefMI and the live ranges are still
@@ -2180,7 +2233,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
// possibility that no instructions actually read the clobbered lanes.
// If we're clobbering all the lanes in OtherVNI, at least one must be read.
// Otherwise Other.RI wouldn't be live here.
- if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
return CR_Impossible;
// We need to verify that no instructions are reading the clobbered lanes. To
@@ -2228,11 +2281,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
Val &OtherV = Other.Vals[V.OtherVNI->id];
// We cannot erase an IMPLICIT_DEF if we don't have valid values for all
// its lanes.
- if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness)
+ if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
OtherV.ErasableImplicitDef = false;
OtherV.Pruned = true;
+ LLVM_FALLTHROUGH;
}
- // Fall through.
default:
// This value number needs to go in the final joined live range.
Assignments[ValNo] = NewVNInfo.size();
@@ -2289,7 +2342,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
TaintedLanes &= ~OV.WriteLanes;
if (!OV.RedefVNI)
break;
- } while (TaintedLanes);
+ } while (TaintedLanes.any());
return true;
}
@@ -2302,8 +2355,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
continue;
if (!MO.readsReg())
continue;
- if (Lanes & TRI->getSubRegIndexLaneMask(
- TRI->composeSubRegIndices(SubIdx, MO.getSubReg())))
+ unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+ if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
return true;
}
return false;
@@ -2350,7 +2403,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
Indexes->getInstructionFromIndex(TaintExtent.front().first);
assert(LastMI && "Range must end at a proper instruction");
unsigned TaintNum = 0;
- for(;;) {
+ for (;;) {
assert(MI != MBB->end() && "Bad LastMI");
if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
@@ -2415,7 +2468,8 @@ void JoinVals::pruneValues(JoinVals &Other,
for (MachineOperand &MO :
Indexes->getInstructionFromIndex(Def)->operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
- MO.setIsUndef(EraseImpDef);
+ if (MO.getSubReg() != 0)
+ MO.setIsUndef(EraseImpDef);
MO.setIsDead(false);
}
}
@@ -2448,8 +2502,7 @@ void JoinVals::pruneValues(JoinVals &Other,
}
}
-void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
-{
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Look for values being erased.
bool DidPrune = false;
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
@@ -2486,6 +2539,30 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
LI.removeEmptySubRanges();
}
+/// Check if any of the subranges of @p LI contain a definition at @p Def.
+static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
+ if (VNI->def == Def)
+ return true;
+ }
+ return false;
+}
+
+void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
+ assert(&static_cast<LiveRange&>(LI) == &LR);
+
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ if (Vals[i].Resolution != CR_Keep)
+ continue;
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
+ continue;
+ Vals[i].Pruned = true;
+ ShrinkMainRange = true;
+ }
+}
+
void JoinVals::removeImplicitDefs() {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
Val &V = Vals[i];
@@ -2499,7 +2576,8 @@ void JoinVals::removeImplicitDefs() {
}
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
- SmallVectorImpl<unsigned> &ShrinkRegs) {
+ SmallVectorImpl<unsigned> &ShrinkRegs,
+ LiveInterval *LI) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
SlotIndex Def = LR.getValNumInfo(i)->def;
@@ -2511,13 +2589,65 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
break;
// Remove value number i from LR.
+ // For intervals with subranges, removing a segment from the main range
+ // may require extending the previous segment: for each definition of
+ // a subregister, there will be a corresponding def in the main range.
+ // That def may fall in the middle of a segment from another subrange.
+ // In such cases, removing this def from the main range must be
+ // complemented by extending the main range to account for the liveness
+ // of the other subrange.
VNInfo *VNI = LR.getValNumInfo(i);
+ SlotIndex Def = VNI->def;
+ // The new end point of the main range segment to be extended.
+ SlotIndex NewEnd;
+ if (LI != nullptr) {
+ LiveRange::iterator I = LR.FindSegmentContaining(Def);
+ assert(I != LR.end());
+ // Do not extend beyond the end of the segment being removed.
+ // The segment may have been pruned in preparation for joining
+ // live ranges.
+ NewEnd = I->end;
+ }
+
LR.removeValNo(VNI);
// Note that this VNInfo is reused and still referenced in NewVNInfo,
// make it appear like an unused value number.
VNI->markUnused();
- DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
- // FALL THROUGH.
+
+ if (LI != nullptr && LI->hasSubRanges()) {
+ assert(static_cast<LiveRange*>(LI) == &LR);
+ // Determine the end point based on the subrange information:
+ // minimum of (earliest def of next segment,
+ // latest end point of containing segment)
+ SlotIndex ED, LE;
+ for (LiveInterval::SubRange &SR : LI->subranges()) {
+ LiveRange::iterator I = SR.find(Def);
+ if (I == SR.end())
+ continue;
+ if (I->start > Def)
+ ED = ED.isValid() ? std::min(ED, I->start) : I->start;
+ else
+ LE = LE.isValid() ? std::max(LE, I->end) : I->end;
+ }
+ if (LE.isValid())
+ NewEnd = std::min(NewEnd, LE);
+ if (ED.isValid())
+ NewEnd = std::min(NewEnd, ED);
+
+ // We only want to do the extension if there was a subrange that
+ // was live across Def.
+ if (LE.isValid()) {
+ LiveRange::iterator S = LR.find(Def);
+ if (S != LR.begin())
+ std::prev(S)->end = NewEnd;
+ }
+ }
+ DEBUG({
+ dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
+ if (LI != nullptr)
+ dbgs() << "\t\t LHS = " << *LI << '\n';
+ });
+ LLVM_FALLTHROUGH;
}
case CR_Erase: {
@@ -2591,8 +2721,15 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
- DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
- << " points: " << LRange << '\n');
+ DEBUG({
+ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+ for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+ dbgs() << EndPoints[i];
+ if (i != n-1)
+ dbgs() << ',';
+ }
+ dbgs() << ": " << LRange << '\n';
+ });
LIS->extendToIndices(LRange, EndPoints);
}
@@ -2606,7 +2743,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
// LaneMask of subregisters common to subrange R and ToMerge.
LaneBitmask Common = RMask & LaneMask;
// There is nothing to do without common subregs.
- if (Common == 0)
+ if (Common.none())
continue;
DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
@@ -2615,7 +2752,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
// they have to split into their own subrange.
LaneBitmask LRest = RMask & ~LaneMask;
LiveInterval::SubRange *CommonRange;
- if (LRest != 0) {
+ if (LRest.any()) {
R.LaneMask = LRest;
DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
// Duplicate SubRange for newly merged common stuff.
@@ -2630,7 +2767,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneMask &= ~RMask;
}
- if (LaneMask != 0) {
+ if (LaneMask.any()) {
DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
}
@@ -2641,10 +2778,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
- JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
- TRI, false, TrackSubRegLiveness);
- JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
- TRI, false, TrackSubRegLiveness);
+ JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
+ NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
+ JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
+ NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
DEBUG(dbgs() << "\t\tRHS = " << RHS
<< "\n\t\tLHS = " << LHS
@@ -2670,7 +2807,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(DstIdx);
// LHS must support subregs or we wouldn't be in this codepath.
- assert(Mask != 0);
+ assert(Mask.any());
LHS.createSubRangeFrom(Allocator, Mask, LHS);
} else if (DstIdx != 0) {
// Transform LHS lanemasks to new register class if necessary.
@@ -2697,6 +2834,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
}
DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+ // Pruning implicit defs from subranges may result in the main range
+ // having stale segments.
+ LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
}
@@ -2712,7 +2853,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Erase COPY and IMPLICIT_DEF instructions. This may cause some external
// registers to require trimming.
SmallVector<unsigned, 8> ShrinkRegs;
- LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
while (!ShrinkRegs.empty())
shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
@@ -2729,8 +2870,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
if (!EndPoints.empty()) {
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
- DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
- << " points: " << LHS << '\n');
+ DEBUG({
+ dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+ for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+ dbgs() << EndPoints[i];
+ if (i != n-1)
+ dbgs() << ',';
+ }
+ dbgs() << ": " << LHS << '\n';
+ });
LIS->extendToIndices((LiveRange&)LHS, EndPoints);
}
@@ -3039,7 +3187,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
// If subranges are still supported, then the same subregs
// should still be supported.
for (LiveInterval::SubRange &S : LI.subranges()) {
- assert((S.LaneMask & ~MaxMask) == 0);
+ assert((S.LaneMask & ~MaxMask).none());
}
#endif
}
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index a21d6c1..fc84aeb 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -26,8 +26,8 @@ using namespace llvm;
static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, unsigned Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
- assert((PrevMask & ~NewMask) == 0 && "Must not remove bits");
- if (PrevMask != 0 || NewMask == 0)
+ assert((PrevMask & ~NewMask).none() && "Must not remove bits");
+ if (PrevMask.any() || NewMask.none())
return;
PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -40,8 +40,8 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
const MachineRegisterInfo &MRI, unsigned Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
- assert((NewMask & !PrevMask) == 0 && "Must not add bits");
- if (NewMask != 0 || PrevMask == 0)
+ //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ if (NewMask.any() || PrevMask.none())
return;
PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -73,7 +73,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Live In: ";
for (const RegisterMaskPair &P : LiveInRegs) {
dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
- if (P.LaneMask != ~0u)
+ if (!P.LaneMask.all())
dbgs() << ':' << PrintLaneMask(P.LaneMask);
dbgs() << ' ';
}
@@ -81,7 +81,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Live Out: ";
for (const RegisterMaskPair &P : LiveOutRegs) {
dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
- if (P.LaneMask != ~0u)
+ if (!P.LaneMask.all())
dbgs() << ':' << PrintLaneMask(P.LaneMask);
dbgs() << ' ';
}
@@ -112,7 +112,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
- if (PreviousMask != 0 || NewMask == 0)
+ if (PreviousMask.any() || NewMask.none())
return;
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
@@ -266,9 +266,8 @@ bool RegPressureTracker::isBottomClosed() const {
SlotIndex RegPressureTracker::getCurrSlot() const {
- MachineBasicBlock::const_iterator IdxPos = CurrPos;
- while (IdxPos != MBB->end() && IdxPos->isDebugValue())
- ++IdxPos;
+ MachineBasicBlock::const_iterator IdxPos =
+ skipDebugInstructionsForward(CurrPos, MBB->end());
if (IdxPos == MBB->end())
return LIS->getMBBEndIdx(MBB);
return LIS->getInstructionIndex(*IdxPos).getRegSlot();
@@ -322,29 +321,28 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
unsigned RegUnit = Pair.RegUnit;
if (TargetRegisterInfo::isVirtualRegister(RegUnit)
&& !RPTracker.hasUntiedDef(RegUnit))
- increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask);
+ increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
+ LaneBitmask::getNone(), Pair.LaneMask);
}
}
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
unsigned RegUnit) {
- auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
- [RegUnit](const RegisterMaskPair Other) {
- return Other.RegUnit == RegUnit;
- });
+ auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
if (I == RegUnits.end())
- return 0;
+ return LaneBitmask::getNone();
return I->LaneMask;
}
static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
- assert(Pair.LaneMask != 0);
- auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
- [RegUnit](const RegisterMaskPair Other) {
- return Other.RegUnit == RegUnit;
- });
+ assert(Pair.LaneMask.any());
+ auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
if (I == RegUnits.end()) {
RegUnits.push_back(Pair);
} else {
@@ -354,28 +352,26 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
unsigned RegUnit) {
- auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
- [RegUnit](const RegisterMaskPair Other) {
- return Other.RegUnit == RegUnit;
- });
+ auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
if (I == RegUnits.end()) {
- RegUnits.push_back(RegisterMaskPair(RegUnit, 0));
+ RegUnits.push_back(RegisterMaskPair(RegUnit, LaneBitmask::getNone()));
} else {
- I->LaneMask = 0;
+ I->LaneMask = LaneBitmask::getNone();
}
}
static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
- assert(Pair.LaneMask != 0);
- auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
- [RegUnit](const RegisterMaskPair Other) {
- return Other.RegUnit == RegUnit;
- });
+ assert(Pair.LaneMask.any());
+ auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
if (I != RegUnits.end()) {
I->LaneMask &= ~Pair.LaneMask;
- if (I->LaneMask == 0)
+ if (I->LaneMask.none())
RegUnits.erase(I);
}
}
@@ -386,14 +382,15 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
- LaneBitmask Result = 0;
+ LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if (Property(SR, Pos))
Result |= SR.LaneMask;
}
} else if (Property(LI, Pos)) {
- Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u;
+ Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+ : LaneBitmask::getAll();
}
return Result;
@@ -403,7 +400,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
// for physical registers on targets with many registers (GPUs).
if (LR == nullptr)
return SafeDefault;
- return Property(*LR, Pos) ? ~0u : 0;
+ return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
}
}
@@ -411,7 +408,8 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
bool TrackLaneMasks, unsigned RegUnit,
SlotIndex Pos) {
- return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getAll(),
[](const LiveRange &LR, SlotIndex Pos) {
return LR.liveAt(Pos);
});
@@ -478,10 +476,10 @@ class RegisterOperandsCollector {
void pushReg(unsigned Reg,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u));
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -516,7 +514,7 @@ class RegisterOperandsCollector {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
} else if (MRI.isAllocatable(Reg)) {
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -567,11 +565,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
// of a subregister def we need a read-undef flag.
unsigned RegUnit = I->RegUnit;
if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
- AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0)
+ AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
LaneBitmask ActualDef = I->LaneMask & LiveAfter;
- if (ActualDef == 0) {
+ if (ActualDef.none()) {
I = Defs.erase(I);
} else {
I->LaneMask = ActualDef;
@@ -582,7 +580,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
Pos.getBaseIndex());
LaneBitmask LaneMask = I->LaneMask & LiveBefore;
- if (LaneMask == 0) {
+ if (LaneMask.none()) {
I = Uses.erase(I);
} else {
I->LaneMask = LaneMask;
@@ -596,7 +594,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
Pos.getDeadSlot());
- if (LiveAfter == 0)
+ if (LiveAfter.none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
}
}
@@ -673,17 +671,16 @@ void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
- assert(Pair.LaneMask != 0);
+ assert(Pair.LaneMask.any());
unsigned RegUnit = Pair.RegUnit;
- auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(),
- [RegUnit](const RegisterMaskPair &Other) {
- return Other.RegUnit == RegUnit;
- });
+ auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+ return Other.RegUnit == RegUnit;
+ });
LaneBitmask PrevMask;
LaneBitmask NewMask;
if (I == LiveInOrOut.end()) {
- PrevMask = 0;
+ PrevMask = LaneBitmask::getNone();
NewMask = Pair.LaneMask;
LiveInOrOut.push_back(Pair);
} else {
@@ -738,14 +735,15 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
- if (LiveOut != 0) {
+ if (LiveOut.any()) {
discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
// Retroactively model effects on pressure of the live out lanes.
- increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut);
+ increaseSetPressure(CurrSetPressure, *MRI, Reg, LaneBitmask::getNone(),
+ LiveOut);
PreviousMask = LiveOut;
}
- if (NewMask == 0) {
+ if (NewMask.none()) {
// Add a 0 entry to LiveUses as a marker that the complete vreg has become
// dead.
if (TrackLaneMasks && LiveUses != nullptr)
@@ -762,26 +760,25 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Generate liveness for uses.
for (const RegisterMaskPair &Use : RegOpers.Uses) {
unsigned Reg = Use.RegUnit;
- assert(Use.LaneMask != 0);
+ assert(Use.LaneMask.any());
LaneBitmask PreviousMask = LiveRegs.insert(Use);
LaneBitmask NewMask = PreviousMask | Use.LaneMask;
if (NewMask == PreviousMask)
continue;
// Did the register just become live?
- if (PreviousMask == 0) {
+ if (PreviousMask.none()) {
if (LiveUses != nullptr) {
if (!TrackLaneMasks) {
addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
} else {
- auto I = std::find_if(LiveUses->begin(), LiveUses->end(),
- [Reg](const RegisterMaskPair Other) {
- return Other.RegUnit == Reg;
- });
+ auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
bool IsRedef = I != LiveUses->end();
if (IsRedef) {
// ignore re-defs here...
- assert(I->LaneMask == 0);
+ assert(I->LaneMask.none());
removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
} else {
addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
@@ -792,7 +789,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Discover live outs if this may be the first occurance of this register.
if (RequireIntervals) {
LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
- if (LiveOut != 0)
+ if (LiveOut.any())
discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
}
}
@@ -803,7 +800,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
for (const RegisterMaskPair &Def : RegOpers.Defs) {
unsigned RegUnit = Def.RegUnit;
if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
- (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0)
+ (LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
}
}
@@ -819,9 +816,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
static_cast<RegionPressure&>(P).openTop(CurrPos);
// Find the previous instruction.
- do
- --CurrPos;
- while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+ CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
SlotIndex SlotIdx;
if (RequireIntervals)
@@ -871,7 +866,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
unsigned Reg = Use.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
- if (LiveIn != 0) {
+ if (LiveIn.any()) {
discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
@@ -879,7 +874,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
// Kill liveness at last uses.
if (RequireIntervals) {
LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
- if (LastUseMask != 0) {
+ if (LastUseMask.any()) {
LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
}
@@ -897,9 +892,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
bumpDeadDefs(RegOpers.DeadDefs);
// Find the next instruction.
- do
- ++CurrPos;
- while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+ CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end());
}
void RegPressureTracker::advance() {
@@ -1192,8 +1185,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
unsigned SubRegIdx = MO.getSubReg();
LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
LastUseMask &= ~UseMask;
- if (LastUseMask == 0)
- return 0;
+ if (LastUseMask.none())
+ return LaneBitmask::getNone();
}
}
return LastUseMask;
@@ -1202,7 +1195,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
- return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getAll(),
[](const LiveRange &LR, SlotIndex Pos) {
return LR.liveAt(Pos);
});
@@ -1212,7 +1206,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
- Pos.getBaseIndex(), 0,
+ Pos.getBaseIndex(), LaneBitmask::getNone(),
[](const LiveRange &LR, SlotIndex Pos) {
const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
return S != nullptr && S->end == Pos.getRegSlot();
@@ -1222,7 +1216,8 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
- return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u,
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+ LaneBitmask::getNone(),
[](const LiveRange &LR, SlotIndex Pos) {
const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
return S != nullptr && S->start < Pos.getRegSlot(true) &&
@@ -1253,7 +1248,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
for (const RegisterMaskPair &Use : RegOpers.Uses) {
unsigned Reg = Use.RegUnit;
LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
- if (LastUseMask == 0)
+ if (LastUseMask.none())
continue;
// The LastUseMask is queried from the liveness information of instruction
// which may be further down the schedule. Some lanes may actually not be
@@ -1263,7 +1258,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
SlotIndex CurrIdx = getCurrSlot();
LastUseMask
= findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
- if (LastUseMask == 0)
+ if (LastUseMask.none())
continue;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 6b80179..fdf741f 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -34,33 +34,12 @@ using namespace llvm;
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
LaneBitmask UnitMask = (*RUI).second;
- if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+ if (UnitMask.none() || (LaneMask & UnitMask).any())
RegUnitsAvailable.reset((*RUI).first);
}
}
-void RegScavenger::initRegState() {
- for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
- IE = Scavenged.end(); I != IE; ++I) {
- I->Reg = 0;
- I->Restore = nullptr;
- }
-
- // All register units start out unused.
- RegUnitsAvailable.set();
-
- // Live-in registers are in use.
- for (const auto &LI : MBB->liveins())
- setRegUsed(LI.PhysReg, LI.LaneMask);
-
- // Pristine CSRs are also unavailable.
- const MachineFunction &MF = *MBB->getParent();
- BitVector PR = MF.getFrameInfo()->getPristineRegs(MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setRegUsed(I);
-}
-
-void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+void RegScavenger::init(MachineBasicBlock &MBB) {
MachineFunction &MF = *MBB.getParent();
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
@@ -69,11 +48,6 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
- // It is not possible to use the register scavenger after late optimization
- // passes that don't preserve accurate liveness information.
- assert(MRI->tracksLiveness() &&
- "Cannot use register scavenger with inaccurate liveness");
-
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
@@ -84,16 +58,56 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
}
this->MBB = &MBB;
- initRegState();
+ for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = nullptr;
+ }
+
+ // All register units start out unused.
+ RegUnitsAvailable.set();
+
+ // Pristine CSRs are not available.
+ BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setRegUsed(I);
Tracking = false;
}
+void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ setRegUsed(LI.PhysReg, LI.LaneMask);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+ init(MBB);
+ setLiveInsUsed(MBB);
+}
+
+void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
+ init(MBB);
+ // Merge live-ins of successors to get live-outs.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ setLiveInsUsed(*Succ);
+
+ // Move internal iterator at the last instruction of the block.
+ if (MBB.begin() != MBB.end()) {
+ MBBI = std::prev(MBB.end());
+ Tracking = true;
+ }
+}
+
void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
+void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ BV.reset(*RUI);
+}
+
void RegScavenger::determineKillsAndDefs() {
assert(Tracking && "Must be tracking to determine kills and defs");
@@ -245,6 +259,48 @@ void RegScavenger::forward() {
setUsed(DefRegUnits);
}
+void RegScavenger::backward() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ const MachineInstr &MI = *MBBI;
+ // Defined or clobbered registers are available now.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
+ ++RU) {
+ for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+ if (MO.clobbersPhysReg(*RURI)) {
+ RegUnitsAvailable.set(RU);
+ break;
+ }
+ }
+ }
+ } else if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+ isReserved(Reg))
+ continue;
+ addRegUnits(RegUnitsAvailable, Reg);
+ }
+ }
+ // Mark read registers as unavailable.
+ for (const MachineOperand &MO : MI.uses()) {
+ if (MO.isReg() && MO.readsReg()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+ isReserved(Reg))
+ continue;
+ removeRegUnits(RegUnitsAvailable, Reg);
+ }
+ }
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(nullptr);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
if (includeReserved && isReserved(Reg))
return true;
@@ -358,7 +414,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- Candidates.reset(MO.getReg());
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
}
// Try to find a register that's unused if there is one, as then we won't
@@ -380,7 +437,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned NeedSize = RC->getSize();
unsigned NeedAlign = RC->getAlignment();
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 5cf3e57..66f1966 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
#define DEBUG_TYPE "ip-regalloc"
-cl::opt<bool> DumpRegUsage(
+static cl::opt<bool> DumpRegUsage(
"print-regusage", cl::init(false), cl::Hidden,
cl::desc("print register usage details collected for analysis."));
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index ea952d9..2f7ee8b 100644
--- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -48,7 +48,7 @@ public:
static char ID;
RenameIndependentSubregs() : MachineFunctionPass(ID) {}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Rename Disconnected Subregister Components";
}
@@ -184,7 +184,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
unsigned MergedID = ~0u;
for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
const LiveInterval::SubRange &SR = *SRInfo.SR;
- if ((SR.LaneMask & LaneMask) == 0)
+ if ((SR.LaneMask & LaneMask).none())
continue;
SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
@@ -219,24 +219,23 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
if (!MO.isDef() && !MO.readsReg())
continue;
- MachineInstr &MI = *MO.getParent();
-
- SlotIndex Pos = LIS->getInstructionIndex(MI);
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+ : Pos.getBaseIndex();
unsigned SubRegIdx = MO.getSubReg();
LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
unsigned ID = ~0u;
for (const SubRangeInfo &SRInfo : SubRangeInfos) {
const LiveInterval::SubRange &SR = *SRInfo.SR;
- if ((SR.LaneMask & LaneMask) == 0)
+ if ((SR.LaneMask & LaneMask).none())
continue;
- LiveRange::const_iterator I = SR.find(Pos);
- if (I == SR.end())
+ const VNInfo *VNI = SR.getVNInfoAt(Pos);
+ if (VNI == nullptr)
continue;
- const VNInfo &VNI = *I->valno;
// Map to local representant ID.
- unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
// Global ID
ID = Classes[LocalID + SRInfo.Index];
break;
@@ -354,19 +353,24 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
if (I == 0)
LI.clear();
LIS->constructMainRangeFromSubranges(LI);
+ // A def of a subregister may be a use of other register lanes. Replacing
+ // such a def with a def of a different register will eliminate the use,
+ // and may cause the recorded live range to be larger than the actual
+ // liveness in the program IR.
+ LIS->shrinkToUses(&LI);
}
}
bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
// Skip renaming if liveness of subregister is not tracked.
- if (!MF.getSubtarget().enableSubRegLiveness())
+ MRI = &MF.getRegInfo();
+ if (!MRI->subRegLivenessEnabled())
return false;
DEBUG(dbgs() << "Renaming independent subregister live ranges in "
<< MF.getName() << '\n');
LIS = &getAnalysis<LiveIntervals>();
- MRI = &MF.getRegInfo();
TII = MF.getSubtarget().getInstrInfo();
// Iterate over all vregs. Note that we query getNumVirtRegs() the newly
diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
new file mode 100644
index 0000000..4519641
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -0,0 +1,67 @@
+//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a pass that will conditionally reset a machine
+/// function as if it was just created. This is used to provide a fallback
+/// mechanism when GlobalISel fails, thus the condition for the reset to
+/// happen is that the MachineFunction has the FailedISel property.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "reset-machine-function"
+
+STATISTIC(NumFunctionsReset, "Number of functions reset");
+
+namespace {
+ class ResetMachineFunction : public MachineFunctionPass {
+ /// Tells whether or not this pass should emit a fallback
+ /// diagnostic when it resets a function.
+ bool EmitFallbackDiag;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ResetMachineFunction(bool EmitFallbackDiag = false)
+ : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+
+ StringRef getPassName() const override { return "ResetMachineFunction"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel)) {
+ DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
+ ++NumFunctionsReset;
+ MF.reset();
+ if (EmitFallbackDiag) {
+ const Function &F = *MF.getFunction();
+ DiagnosticInfoISelFallback DiagFallback(F);
+ F.getContext().diagnose(DiagFallback);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ };
+} // end anonymous namespace
+
+char ResetMachineFunction::ID = 0;
+INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
+ "reset machine function if ISel failed", false, false)
+
+MachineFunctionPass *
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
+ return new ResetMachineFunction(EmitFallbackDiag);
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index 4a1b995..2b82df2 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -52,17 +52,6 @@ using namespace llvm::safestack;
#define DEBUG_TYPE "safestack"
-enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
-
-static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
- cl::Hidden, cl::init(ThreadLocalUSP),
- cl::desc("Type of storage for the unsafe stack pointer"),
- cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
- "Thread-local storage"),
- clEnumValN(SingleThreadUSP, "single-thread",
- "Non-thread-local storage"),
- clEnumValEnd));
-
namespace llvm {
STATISTIC(NumFunctions, "Total number of functions");
@@ -124,9 +113,6 @@ class SafeStack : public FunctionPass {
/// might expect to appear on the stack on most common targets.
enum { StackAlignment = 16 };
- /// \brief Build a value representing a pointer to the unsafe stack pointer.
- Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
-
/// \brief Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -356,46 +342,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
return true;
}
-Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
- // Check if there is a target-specific location for the unsafe stack pointer.
- if (TL)
- if (Value *V = TL->getSafeStackPointerLocation(IRB))
- return V;
-
- // Otherwise, assume the target links with compiler-rt, which provides a
- // thread-local variable with a magic name.
- Module &M = *F.getParent();
- const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
- auto UnsafeStackPtr =
- dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
-
- bool UseTLS = USPStorage == ThreadLocalUSP;
-
- if (!UnsafeStackPtr) {
- auto TLSModel = UseTLS ?
- GlobalValue::InitialExecTLSModel :
- GlobalValue::NotThreadLocal;
- // The global variable is not defined yet, define it ourselves.
- // We use the initial-exec TLS model because we do not support the
- // variable living anywhere other than in the main executable.
- UnsafeStackPtr = new GlobalVariable(
- M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
- UnsafeStackPtrVar, nullptr, TLSModel);
- } else {
- // The variable exists, check its type and attributes.
- if (UnsafeStackPtr->getValueType() != StackPtrTy)
- report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
- if (UseTLS != UnsafeStackPtr->isThreadLocal())
- report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
- (UseTLS ? "" : "not ") + "be thread-local");
- }
- return UnsafeStackPtr;
-}
-
Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
- Value *StackGuardVar = nullptr;
- if (TL)
- StackGuardVar = TL->getIRStackGuard(IRB);
+ Value *StackGuardVar = TL->getIRStackGuard(IRB);
if (!StackGuardVar)
StackGuardVar =
F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
@@ -752,7 +700,9 @@ bool SafeStack::runOnFunction(Function &F) {
return false;
}
- TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+ if (!TM)
+ report_fatal_error("Target machine is required");
+ TL = TM->getSubtargetImpl(F)->getTargetLowering();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
++NumFunctions;
@@ -764,7 +714,7 @@ bool SafeStack::runOnFunction(Function &F) {
// Collect all points where stack gets unwound and needs to be restored
// This is only necessary because the runtime (setjmp and unwind code) is
- // not aware of the unsafe stack and won't unwind/restore it prorerly.
+ // not aware of the unsafe stack and won't unwind/restore it properly.
// To work around this problem without changing the runtime, we insert
// instrumentation to restore the unsafe stack pointer when necessary.
SmallVector<Instruction *, 4> StackRestorePoints;
@@ -786,7 +736,7 @@ bool SafeStack::runOnFunction(Function &F) {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
- UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
+ UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB);
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 795eb8d..7fbeadd 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -214,10 +214,12 @@ void StackColoring::calculateLiveIntervals() {
unsigned AllocaNo = It.second.AllocaNo;
if (IsStart) {
- assert(!Started.test(AllocaNo));
- Started.set(AllocaNo);
- Ended.reset(AllocaNo);
- Start[AllocaNo] = InstNo;
+ assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
+ if (!Started.test(AllocaNo)) {
+ Started.set(AllocaNo);
+ Ended.reset(AllocaNo);
+ Start[AllocaNo] = InstNo;
+ }
} else {
assert(!Ended.test(AllocaNo));
if (Started.test(AllocaNo)) {
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
index fb433c1..7d4dbd1 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -132,6 +132,14 @@ void StackLayout::computeLayout() {
// If this is replaced with something smarter, it must preserve the property
// that the first object is always at the offset 0 in the stack frame (for
// StackProtectorSlot), or handle stack protector in some other way.
+
+ // Sort objects by size (largest first) to reduce fragmentation.
+ if (StackObjects.size() > 2)
+ std::stable_sort(StackObjects.begin() + 1, StackObjects.end(),
+ [](const StackObject &a, const StackObject &b) {
+ return a.Size > b.Size;
+ });
+
for (auto &Obj : StackObjects)
layoutObject(Obj);
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index efde61e..427d952 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -139,8 +139,7 @@ void SUnit::removePred(const SDep &D) {
SDep P = D;
P.setSUnit(this);
SUnit *N = D.getSUnit();
- SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
- N->Succs.end(), P);
+ SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
N->Succs.erase(Succ);
Preds.erase(I);
@@ -311,10 +310,20 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
+ if (this == &DAG->ExitSU)
+ OS << "ExitSU";
+ else if (this == &DAG->EntrySU)
+ OS << "EntrySU";
+ else
+ OS << "SU(" << NodeNum << ")";
+}
+
/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
/// a group of nodes flagged together.
void SUnit::dump(const ScheduleDAG *G) const {
- dbgs() << "SU(" << NodeNum << "): ";
+ print(dbgs(), G);
+ dbgs() << ": ";
G->dumpNode(this);
}
@@ -338,12 +347,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
I != E; ++I) {
dbgs() << " ";
switch (I->getKind()) {
- case SDep::Data: dbgs() << "val "; break;
- case SDep::Anti: dbgs() << "anti"; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ch "; break;
+ case SDep::Data: dbgs() << "data "; break;
+ case SDep::Anti: dbgs() << "anti "; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ord "; break;
}
- dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ I->getSUnit()->print(dbgs(), G);
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
@@ -358,12 +367,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
I != E; ++I) {
dbgs() << " ";
switch (I->getKind()) {
- case SDep::Data: dbgs() << "val "; break;
- case SDep::Anti: dbgs() << "anti"; break;
- case SDep::Output: dbgs() << "out "; break;
- case SDep::Order: dbgs() << "ch "; break;
+ case SDep::Data: dbgs() << "data "; break;
+ case SDep::Anti: dbgs() << "anti "; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ord "; break;
}
- dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ I->getSUnit()->print(dbgs(), G);
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 22bfd4d..611c5a7 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -77,7 +77,7 @@ static unsigned getReductionSize() {
static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "{ ";
- for (auto *su : L) {
+ for (const SUnit *su : L) {
dbgs() << "SU(" << su->NodeNum << ")";
if (su != L.back())
dbgs() << ", ";
@@ -142,9 +142,7 @@ static void getUnderlyingObjects(const Value *V,
SmallVector<Value *, 4> Objs;
GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
- for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
- I != IE; ++I) {
- V = *I;
+ for (Value *V : Objs) {
if (!Visited.insert(V).second)
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -164,7 +162,7 @@ static void getUnderlyingObjects(const Value *V,
/// information and it can be tracked to a normal reference to a known
/// object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
- const MachineFrameInfo *MFI,
+ const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
const DataLayout &DL) {
auto allMMOsOkay = [&]() {
@@ -178,16 +176,16 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
// overlapping locations. The client code calling this function assumes
// this is not the case. So return a conservative answer of no known
// object.
- if (MFI->hasTailCall())
+ if (MFI.hasTailCall())
return false;
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
- if (PSV->isAliased(MFI))
+ if (PSV->isAliased(&MFI))
return false;
- bool MayAlias = PSV->mayAlias(MFI);
+ bool MayAlias = PSV->mayAlias(&MFI);
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
@@ -249,32 +247,27 @@ void ScheduleDAGInstrs::exitRegion() {
void ScheduleDAGInstrs::addSchedBarrierDeps() {
MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
- bool AllDepKnown = ExitMI &&
- (ExitMI->isCall() || ExitMI->isBarrier());
- if (ExitMI && AllDepKnown) {
- // If it's a call or a barrier, add dependencies on the defs and uses of
- // instruction.
- for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = ExitMI->getOperand(i);
+ // Add dependencies on the defs and uses of the instruction.
+ if (ExitMI) {
+ for (const MachineOperand &MO : ExitMI->operands()) {
if (!MO.isReg() || MO.isDef()) continue;
unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
-
- if (TRI->isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- else if (MO.readsReg()) // ignore undef operands
- addVRegUseDeps(&ExitSU, i);
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+ addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
+ }
}
- } else {
+ }
+ if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
// For others, e.g. fallthrough, conditional branch, assume the exit
// uses all the registers that are livein to the successor blocks.
- assert(Uses.empty() && "Uses in set before adding deps?");
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI)
- for (const auto &LI : (*SI)->liveins()) {
+ for (const MachineBasicBlock *Succ : BB->successors()) {
+ for (const auto &LI : Succ->liveins()) {
if (!Uses.contains(LI.PhysReg))
Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
}
+ }
}
}
@@ -326,6 +319,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+ // We do not need to track any dependencies for constant registers.
+ if (MRI.isConstantPhysReg(Reg))
+ return;
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
@@ -334,8 +331,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// TODO: Using a latency of 1 here for output dependencies assumes
// there's no cost for reusing registers.
SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
- for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
- Alias.isValid(); ++Alias) {
+ for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
if (!Defs.contains(*Alias))
continue;
for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
@@ -362,13 +358,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
- Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+ Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
if (RemoveKillFlags)
MO.setIsKill(false);
- }
- else {
+ } else {
addPhysRegDataDeps(SU, OperIdx);
- unsigned Reg = MO.getReg();
// clear this register's use list
if (Uses.contains(Reg))
@@ -404,7 +398,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
// No point in tracking lanemasks if we don't have interesting subregisters.
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
if (!RC.HasDisjunctSubRegs)
- return ~0u;
+ return LaneBitmask::getAll();
unsigned SubReg = MO.getSubReg();
if (SubReg == 0)
@@ -430,14 +424,14 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
DefLaneMask = getLaneMaskForMO(MO);
// If we have a <read-undef> flag, none of the lane values comes from an
// earlier instruction.
- KillLaneMask = IsKill ? ~0u : DefLaneMask;
+ KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
// Clear undef flag, we'll re-add it later once we know which subregister
// Def is first.
MO.setIsUndef(false);
} else {
- DefLaneMask = ~0u;
- KillLaneMask = ~0u;
+ DefLaneMask = LaneBitmask::getAll();
+ KillLaneMask = LaneBitmask::getAll();
}
if (MO.isDead()) {
@@ -450,12 +444,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
E = CurrentVRegUses.end(); I != E; /*empty*/) {
LaneBitmask LaneMask = I->LaneMask;
// Ignore uses of other lanes.
- if ((LaneMask & KillLaneMask) == 0) {
+ if ((LaneMask & KillLaneMask).none()) {
++I;
continue;
}
- if ((LaneMask & DefLaneMask) != 0) {
+ if ((LaneMask & DefLaneMask).any()) {
SUnit *UseSU = I->SU;
MachineInstr *Use = UseSU->getInstr();
SDep Dep(SU, SDep::Data, Reg);
@@ -467,7 +461,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
LaneMask &= ~KillLaneMask;
// If we found a Def for all lanes of this use, remove it from the list.
- if (LaneMask != 0) {
+ if (LaneMask.any()) {
I->LaneMask = LaneMask;
++I;
} else
@@ -490,7 +484,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
CurrentVRegDefs.end())) {
// Ignore defs for other lanes.
- if ((V2SU.LaneMask & LaneMask) == 0)
+ if ((V2SU.LaneMask & LaneMask).none())
continue;
// Add an output dependence.
SUnit *DefSU = V2SU.SU;
@@ -513,11 +507,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
V2SU.SU = SU;
V2SU.LaneMask = OverlapMask;
- if (NonOverlapMask != 0)
+ if (NonOverlapMask.any())
CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
}
// If there was no CurrentVRegDefs entry for some lanes yet, create one.
- if (LaneMask != 0)
+ if (LaneMask.any())
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
@@ -533,7 +527,8 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
unsigned Reg = MO.getReg();
// Remember the use. Data dependencies will be added when we find the def.
- LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+ LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
+ : LaneBitmask::getAll();
CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
// Add antidependences to the following defs of the vreg.
@@ -541,7 +536,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
CurrentVRegDefs.end())) {
// Ignore defs for unrelated lanes.
LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
- if ((PrevDefLaneMask & LaneMask) == 0)
+ if ((PrevDefLaneMask & LaneMask).none())
continue;
if (V2SU.SU == SU)
continue;
@@ -554,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
- (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
+ (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
/// This returns true if the two MIs need a chain edge between them.
@@ -621,8 +616,8 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
/// Check whether two objects need a chain edge and add it if needed.
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
- if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
- SUb->getInstr())) {
+ if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
+ SUb->getInstr())) {
SDep Dep(SUa, SDep::MayAliasMem);
Dep.setLatency(Latency);
SUb->addPred(Dep);
@@ -668,10 +663,10 @@ void ScheduleDAGInstrs::initSUnits() {
// within an out-of-order core. These are identified by BufferSize=1.
if (SchedModel.hasInstrSchedModel()) {
const MCSchedClassDesc *SC = getSchedClass(SU);
- for (TargetSchedModel::ProcResIter
- PI = SchedModel.getWriteProcResBegin(SC),
- PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
- switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+ for (const MCWriteProcResEntry &PRE :
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC))) {
+ switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
case 0:
SU->hasReservedResource = true;
break;
@@ -686,44 +681,6 @@ void ScheduleDAGInstrs::initSUnits() {
}
}
-void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
- const MachineInstr *MI = SU->getInstr();
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg())
- continue;
- if (!MO.readsReg())
- continue;
- if (TrackLaneMasks && !MO.isUse())
- continue;
-
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // Ignore re-defs.
- if (TrackLaneMasks) {
- bool FoundDef = false;
- for (const MachineOperand &MO2 : MI->operands()) {
- if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
- FoundDef = true;
- break;
- }
- }
- if (FoundDef)
- continue;
- }
-
- // Record this local VReg use.
- VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
- for (; UI != VRegUses.end(); ++UI) {
- if (UI->SU == SU)
- break;
- }
- if (UI == VRegUses.end())
- VRegUses.insert(VReg2SUnit(Reg, 0, SU));
- }
-}
-
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
/// Current total number of SUs in map.
@@ -901,9 +858,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
CurrentVRegDefs.setUniverse(NumVirtRegs);
CurrentVRegUses.setUniverse(NumVirtRegs);
- VRegUses.clear();
- VRegUses.setUniverse(NumVirtRegs);
-
// Model data dependencies between instructions being scheduled and the
// ExitSU.
addSchedBarrierDeps();
@@ -926,8 +880,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
- collectVRegUses(SU);
-
RegisterOperands RegOpers;
RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
if (TrackLaneMasks) {
@@ -957,12 +909,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
-
- if (TRI->isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- else {
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
}
@@ -977,13 +926,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
- if (Reg == 0)
- continue;
-
- if (TRI->isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- else if (MO.readsReg()) // ignore undef operands
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
addVRegUseDeps(SU, j);
+ }
}
// If we haven't seen any uses in this scheduling region, create a
@@ -1023,7 +970,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
}
// If it's not a store or a variant load, we're done.
- if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+ if (!MI.mayStore() &&
+ !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
continue;
// Always add dependecy edge to BarrierChain if present.
@@ -1200,9 +1148,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
LiveRegs.reset();
// Examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end(); SI != SE; ++SI) {
- for (const auto &LI : (*SI)->liveins()) {
+ for (const MachineBasicBlock *Succ : BB->successors()) {
+ for (const auto &LI : Succ->liveins()) {
// Repeat, for reg and all subregs.
for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -1225,7 +1172,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
// might set it on too many operands. We will clear as many flags as we
// can though.
MachineBasicBlock::instr_iterator Begin = MI->getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
+ MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
while (Begin != End) {
if (NewKillState) {
if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
@@ -1312,6 +1259,11 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// register is used multiple times we only set the kill flag on
// the first use. Don't set kill flags on undef operands.
killedRegs.reset();
+
+ // toggleKillFlag can append new operands (implicit defs), so using
+ // a range-based loop is not safe. The new operands will be appended
+ // at the end of the operand list and they don't need to be visited,
+ // so iterating until the currently last operand is ok.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
@@ -1337,13 +1289,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
- // Warning: toggleKillFlag may invalidate MO.
toggleKillFlag(&MI, MO);
DEBUG(MI.dump());
DEBUG({
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
MachineBasicBlock::instr_iterator Begin = MI.getIterator();
- MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
while (++Begin != End)
DEBUG(Begin->dump());
}
@@ -1355,8 +1306,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
// Mark any used register (that is not using undef) and subregs as
// now live...
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || MRI.isReserved(Reg)) continue;
@@ -1457,13 +1407,12 @@ public:
// the subtree limit, then try to join it now since splitting subtrees is
// only useful if multiple high-pressure paths are possible.
unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
- for (SUnit::const_pred_iterator
- PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
- if (PI->getKind() != SDep::Data)
+ for (const SDep &PredDep : SU->Preds) {
+ if (PredDep.getKind() != SDep::Data)
continue;
- unsigned PredNum = PI->getSUnit()->NodeNum;
+ unsigned PredNum = PredDep.getSUnit()->NodeNum;
if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
- joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+ joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
// Either link or merge the TreeData entry from the child to the parent.
if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
@@ -1505,12 +1454,11 @@ public:
R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
assert(SubtreeClasses.getNumClasses() == RootSet.size()
&& "number of roots should match trees");
- for (SparseSet<RootData>::const_iterator
- RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
- unsigned TreeID = SubtreeClasses[RI->NodeID];
- if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
- R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
- R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+ for (const RootData &Root : RootSet) {
+ unsigned TreeID = SubtreeClasses[Root.NodeID];
+ if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
// Note that SubInstrCount may be greater than InstrCount if we joined
// subtrees across a cross edge. InstrCount will be attributed to the
// original parent, while SubInstrCount will be attributed to the joined
@@ -1524,14 +1472,12 @@ public:
DEBUG(dbgs() << " SU(" << Idx << ") in tree "
<< R.DFSNodeData[Idx].SubtreeID << '\n');
}
- for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
- I = ConnectionPairs.begin(), E = ConnectionPairs.end();
- I != E; ++I) {
- unsigned PredTree = SubtreeClasses[I->first->NodeNum];
- unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+ for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
+ unsigned PredTree = SubtreeClasses[P.first->NodeNum];
+ unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
if (PredTree == SuccTree)
continue;
- unsigned Depth = I->first->getDepth();
+ unsigned Depth = P.first->getDepth();
addConnection(PredTree, SuccTree, Depth);
addConnection(SuccTree, PredTree, Depth);
}
@@ -1553,9 +1499,8 @@ protected:
// Four is the magic number of successors before a node is considered a
// pinch point.
unsigned NumDataSucs = 0;
- for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
- SE = PredSU->Succs.end(); SI != SE; ++SI) {
- if (SI->getKind() == SDep::Data) {
+ for (const SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep.getKind() == SDep::Data) {
if (++NumDataSucs >= 4)
return false;
}
@@ -1575,10 +1520,9 @@ protected:
do {
SmallVectorImpl<SchedDFSResult::Connection> &Connections =
R.SubtreeConnections[FromTree];
- for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
- I = Connections.begin(), E = Connections.end(); I != E; ++I) {
- if (I->TreeID == ToTree) {
- I->Level = std::max(I->Level, Depth);
+ for (SchedDFSResult::Connection &C : Connections) {
+ if (C.TreeID == ToTree) {
+ C.Level = std::max(C.Level, Depth);
return;
}
}
@@ -1617,9 +1561,9 @@ public:
} // anonymous
static bool hasDataSucc(const SUnit *SU) {
- for (SUnit::const_succ_iterator
- SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
- if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+ for (const SDep &SuccDep : SU->Succs) {
+ if (SuccDep.getKind() == SDep::Data &&
+ !SuccDep.getSUnit()->isBoundaryNode())
return true;
}
return false;
@@ -1632,15 +1576,13 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
llvm_unreachable("Top-down ILP metric is unimplemnted");
SchedDFSImpl Impl(*this);
- for (ArrayRef<SUnit>::const_iterator
- SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
- const SUnit *SU = &*SI;
- if (Impl.isVisited(SU) || hasDataSucc(SU))
+ for (const SUnit &SU : SUnits) {
+ if (Impl.isVisited(&SU) || hasDataSucc(&SU))
continue;
SchedDAGReverseDFS DFS;
- Impl.visitPreorder(SU);
- DFS.follow(SU);
+ Impl.visitPreorder(&SU);
+ DFS.follow(&SU);
for (;;) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
@@ -1676,13 +1618,11 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
/// connected to this tree, record the depth of the connection so that the
/// nearest connected subtrees can be prioritized.
void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
- for (SmallVectorImpl<Connection>::const_iterator
- I = SubtreeConnections[SubtreeID].begin(),
- E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
- SubtreeConnectLevels[I->TreeID] =
- std::max(SubtreeConnectLevels[I->TreeID], I->Level);
- DEBUG(dbgs() << " Tree: " << I->TreeID
- << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+ for (const Connection &C : SubtreeConnections[SubtreeID]) {
+ SubtreeConnectLevels[C.TreeID] =
+ std::max(SubtreeConnectLevels[C.TreeID], C.Level);
+ DEBUG(dbgs() << " Tree: " << C.TreeID
+ << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
}
}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 69c4870..83bc1ba 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -145,7 +145,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[StageCycle];
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
freeUnits &= ~RequiredScoreboard[StageCycle];
@@ -197,7 +197,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
freeUnits &= ~ReservedScoreboard[cycle + i];
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
freeUnits &= ~RequiredScoreboard[cycle + i];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ecc6da..2c7bffe 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,14 +16,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -181,7 +182,7 @@ namespace {
/// if things it uses can be simplified by bit propagation.
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
- unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
@@ -326,7 +327,7 @@ namespace {
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
- SDValue visitFMULForFMACombine(SDNode *N);
+ SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
@@ -334,12 +335,15 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+ SDValue foldSelectOfConstants(SDNode *N);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
+ SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -356,6 +360,7 @@ namespace {
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
@@ -374,9 +379,14 @@ namespace {
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue splitMergedValStore(StoreSDNode *ST);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+ SDValue reduceBuildVecToShuffle(SDNode *N);
+ SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -444,10 +454,11 @@ namespace {
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return True if a merged store was created.
- bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
- EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return number of stores that were merged into a merged store (always
+ /// a prefix of \p StoreNode).
+ bool MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
@@ -464,8 +475,10 @@ namespace {
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
- /// \return True if some memory operations were changed.
- bool MergeConsecutiveStores(StoreSDNode *N);
+ /// \return number of stores that were merged into a merged store (the
+ /// affected nodes are stored as a prefix in \p StoreNodes).
+ bool MergeConsecutiveStores(StoreSDNode *N,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorklist(N);
}
-void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
- ((DAGCombiner*)DC)->removeFromWorklist(N);
-}
-
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
@@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
LegalOperations, Depth+1),
Op.getOperand(0), Flags);
case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(Options.UnsafeFPMath);
-
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
if (N0CFP->isZero())
@@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
}
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+ unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+ LHS = LHS.zextOrSelf(Bits);
+ RHS = RHS.zextOrSelf(Bits);
+}
+
// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
@@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
return nullptr;
}
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// int.
-static ConstantSDNode *isConstOrConstSplat(SDValue N) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
-
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
- // BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
- CN->getValueType(0) == N.getValueType().getScalarType())
- return CN;
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+ return !(Const->isOpaque() && NoOpaques);
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+ if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+ (Const->isOpaque() && NoOpaques))
+ return false;
}
-
- return nullptr;
+ return true;
}
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// float.
-static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
- if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
- return CN;
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isNullValue();
+ return false;
+}
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- BitVector UndefElements;
- ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+ return false;
+}
- if (CN && UndefElements.none())
- return CN;
- }
+// Determines if it is a constant integer of all ones or a splatted vector of a
+// constant integer of all ones (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+ unsigned BitWidth = N.getScalarValueSizeInBits();
+ if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+ return Splat->isAllOnesValue() &&
+ Splat->getAPIntValue().getBitWidth() == BitWidth;
+ return false;
+}
- return nullptr;
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(const SDNode *N) {
+ return ISD::isBuildVectorOfConstantSDNodes(N) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(N);
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
- SDLoc dl(Load);
+ SDLoc DL(Load);
EVT VT = Load->getValueType(0);
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
DEBUG(dbgs() << "\nReplacing.9 ";
Load->dump(&DAG);
@@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
- SDLoc dl(Op);
+ SDLoc DL(Op);
if (ISD::isUNINDEXEDLoad(Op.getNode())) {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
@@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
: ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, dl, PVT,
+ return DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
}
@@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
switch (Opc) {
default: break;
case ISD::AssertSext:
- return DAG.getNode(ISD::AssertSext, dl, PVT,
+ return DAG.getNode(ISD::AssertSext, DL, PVT,
SExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::AssertZext:
- return DAG.getNode(ISD::AssertZext, dl, PVT,
+ return DAG.getNode(ISD::AssertZext, DL, PVT,
ZExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- return DAG.getNode(ExtOpc, dl, PVT, Op);
+ return DAG.getNode(ExtOpc, DL, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
- return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+ return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
- SDLoc dl(Op);
+ SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
@@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
- SDLoc dl(Op);
+ SDLoc DL(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
@@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
- return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+ return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
}
/// Promote the specified integer binary operation if the target indicates it is
@@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
- SDLoc dl(Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(Opc, DL, PVT, NN0, NN1));
}
return SDValue();
}
@@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
- SDLoc dl(Op);
- return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
}
return SDValue();
}
@@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- SDLoc dl(Op);
+ SDLoc DL(Op);
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
@@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
- SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
N->dump(&DAG);
@@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
- RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
DEBUG(dbgs() << " ... into: ";
@@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
break;
case ISD::TokenFactor:
- if (Op.hasOneUse() &&
- std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
@@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Changed = true;
break;
}
- // Fall thru
+ LLVM_FALLTHROUGH;
default:
// Only add if it isn't already in the list.
@@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
@@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// fold (add x, undef) -> undef
if (N0.isUndef())
return N0;
+
if (N1.isUndef())
return N1;
+
if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
// canonicalize constant to RHS
if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
// fold (add c1, c2) -> c1+c2
- return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
+ N1.getNode());
}
+
// fold (add x, 0) -> x
if (isNullConstant(N1))
return N0;
+
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+ if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
if (N0.getOpcode() == ISD::SUB)
- if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
- SDLoc DL(N);
+ if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(N1C->getAPIntValue()+
- N0C->getAPIntValue(), DL, VT),
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
}
}
+
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
+ if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
+
// fold ((0-A) + B) -> B-A
- if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
+ if (N0.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
// fold (A + (0-B)) -> A-B
- if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
+ if (N1.getOpcode() == ISD::SUB &&
+ isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
+
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
+
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
+
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
+
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
- return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
- N1.getOperand(0).getOperand(0), N1.getOperand(1));
+ return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+ N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
@@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
- if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+ return DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
- if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
- VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, DL, VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N1.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N0,
+ DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
- DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, DL, VT, N1,
+ DAG.getNode(ISD::SHL, DL, VT,
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
- unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
- SDLoc DL(N);
+ if (NumSignBits == DestBits &&
+ isOneConstantOrOneSplatConstant(N1->getOperand(1)))
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
- }
}
// add (sext i1), X -> sub X, (zext i1)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getValueType() == MVT::i1 &&
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
@@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
@@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
@@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
// fold (sub c1, c2) -> c1-c2
- return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
- N0.getNode(), N1.getNode());
+ return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N1.getNode());
}
- ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
// fold (sub x, c) -> (add x, -c)
if (N1C) {
- SDLoc DL(N);
return DAG.getNode(ISD::ADD, DL, VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
+
+ if (isNullConstantOrNullSplatConstant(N0)) {
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ // Right-shifting everything out but the sign bit followed by negation is
+ // the same as flipping arithmetic/logical shift type without the negation:
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+ ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+ if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+ auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+ if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+ return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+ }
+ }
+
+ // 0 - X --> 0 if the sub is NUW.
+ if (N->getFlags()->hasNoUnsignedWrap())
+ return N0;
+
+ if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+ // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+ // N1 must be 0 because negating the minimum signed value is undefined.
+ if (N->getFlags()->hasNoSignedWrap())
+ return N0;
+
+ // 0 - X --> X if X is 0 or the minimum signed value.
+ return N1;
+ }
+ }
+
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
- if (isAllOnesConstant(N0))
- return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+ return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
+
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
+
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+
// fold C2-(A+C1) -> (C2-C1)-A
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
- if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
- SDLoc DL(N);
- SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
- DL, VT);
- return DAG.getNode(ISD::SUB, DL, VT, NewC,
- N1.getOperand(0));
+ if (N1.getOpcode() == ISD::ADD) {
+ SDValue N11 = N1.getOperand(1);
+ if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
+ SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+ }
}
+
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
- return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(1));
+
// fold ((A+(C+B))-B) -> A+C
- if (N0.getOpcode() == ISD::ADD &&
- N0.getOperand(1).getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
+
// fold ((A-(B-C))-C) -> A-B
- if (N0.getOpcode() == ISD::SUB &&
- N0.getOperand(1).getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
if (N0.isUndef())
@@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() -
- (uint64_t)N1C->getSExtValue());
+ (uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
- SDLoc(N), VT);
+ DL, VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
- SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
@@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
- ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
@@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
- APInt Val;
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N1IsConst && N0.getOpcode() == ISD::SHL &&
- (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1)))) {
+ if (N0.getOpcode() == ISD::SHL &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
- AddToWorklist(C3.getNode());
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ if (isConstantOrConstantVector(C3))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(nullptr, 0), Y(nullptr, 0);
+
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
- (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))) &&
+ isConstantOrConstantVector(N0.getOperand(1)) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(N1.getOperand(1)) &&
+ isConstantOrConstantVector(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
@@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op1 = Node->getOperand(1);
SDValue combined;
for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ UE = Op0.getNode()->use_end(); UI != UE;) {
+ SDNode *User = *UI++;
if (User == Node || User->use_empty())
continue;
// Convert the other matching node(s), too;
@@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
- }
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
@@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return Op;
// sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
- // Otherwise, we break the simplification logic in visitREM().
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
@@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
+
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
- return DAG.getNode(ISD::SRL, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
- getShiftAmountTy(N0.getValueType())));
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1)) {
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+ }
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- EVT ADDVT = N1.getOperand(1).getValueType();
- SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
- N1.getOperand(1),
- DAG.getConstant(SHC->getAPIntValue()
- .logBase2(),
- DL, ADDVT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
- }
+ SDValue N10 = N1.getOperand(0);
+ if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N10)) {
+ SDValue LogBase2 = BuildLogBase2(N10, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+ AddToWorklist(Trunc.getNode());
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
}
}
@@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
return Op;
// sdiv, srem -> sdivrem
- // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
- // Otherwise, we break the simplification logic in visitREM().
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+ // true. Otherwise, we break the simplification logic in visitREM().
if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue DivRem = useDivRem(N))
return DivRem;
@@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
- }
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
// fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
- if (SHC && SHC->getAPIntValue().isPowerOf2()) {
- APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
- }
+ if (N1.getOpcode() == ISD::SHL &&
+ DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
@@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (isOneConstant(N1)) {
SDLoc DL(N);
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
- DL,
+ DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
}
// fold (mulhs x, undef) -> 0
@@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
- if (N0.getNode()->getNumOperands() == 0) return SDValue();
+ if (N0.getNumOperands() == 0) return SDValue();
// For each of OP in AND/OR/XOR:
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
@@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
LL.getValueType().isInteger()) {
// fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
if (isAllOnesConstant(LR)) {
// fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
if (Op1 == ISD::SETEQ) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
}
// fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
if (Op1 == ISD::SETGT) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
}
}
@@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
Op0 == Op1 && LL.getValueType().isInteger() &&
Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDLoc DL(N0);
+ SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+ LL, DAG.getConstant(1, DL,
+ LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, DL, LL.getValueType()),
+ ISD::SETUGE);
+ }
}
// canonicalize equivalent to ll == rl
if (LL == RR && LR == RL) {
@@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned Size = VT.getSizeInBits();
const APInt &AndMask = CAnd->getAPIntValue();
unsigned ShiftBits = CShift->getZExtValue();
+
+ // Bail out, this node will probably disappear anyway.
+ if (ShiftBits == 0)
+ return SDValue();
+
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
@@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
// extended to handle extensions mixed in.
SDValue SL(N0);
- assert(ShiftBits != 0 && MaskBits <= Size);
+ assert(MaskBits <= Size);
// Extracting the highest bit of the low half.
EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
@@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ // x & x --> x
+ if (N0 == N1)
+ return N0;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and x, 0) -> 0, vector edition
if (ISD::isBuildVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getNullValue(
- N0.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N0.getValueType());
+ return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getNullValue(
- N1.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N1.getValueType());
+ return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
@@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (isAllOnesConstant(N1))
return N0;
// if (and x, c) is known to be zero, return 0
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
@@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
- if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
return N1;
// fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
N0.getValueType(), N0Op0);
@@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
EVT VT = Vector->getValueType(0);
- unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
@@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Resize the constant to the same size as the original memory access before
// extension. If it is still the AllOnesValue then this AND is completely
// unneeded.
- Constant =
- Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+ Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
bool B;
switch (Load->getExtensionType()) {
@@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
- if (N1C && (N0.getOpcode() == ISD::LOAD ||
- (N0.getOpcode() == ISD::ANY_EXTEND &&
- N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
LoadSDNode *LN0 = HasAnyExt
? cast<LoadSDNode>(N0.getOperand(0))
@@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
return Tmp;
+ // Masking the negated extension of a boolean is just the zero-extended
+ // boolean:
+ // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+ // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+ //
+ // Note: the SimplifyDemandedBits fold below can make an information-losing
+ // transform, and then we have no way to find this better fold.
+ if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+ ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
+ SDValue SubRHS = N0.getOperand(1);
+ if (SubLHS && SubLHS->isNullValue()) {
+ if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return SubRHS;
+ if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+ SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+ }
+ }
+
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
// fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
@@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = N1.getScalarValueSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ BitWidth - MemVT.getScalarSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = N1.getScalarValueSizeInBits();
if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ BitWidth - MemVT.getScalarSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
- if (!N0.getNode()->hasOneUse() ||
- !N1.getNode()->hasOneUse())
+ if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
// fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
// fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
}
// fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
// fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ EVT CCVT = getSetCCResultType(LR.getValueType());
+ if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
}
}
// canonicalize equivalent to ll == rl
@@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N1.getValueType();
+ // x | x --> x
+ if (N0 == N1)
+ return N0;
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
return DAG.getConstant(
- APInt::getAllOnesValue(
- N0.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N0.getValueType());
+ APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
+ N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
return DAG.getConstant(
- APInt::getAllOnesValue(
- N1.getValueType().getScalarType().getSizeInBits()),
- SDLoc(N), N1.getValueType());
+ APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
+ N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return nullptr;
}
+namespace {
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
+ }
+
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
+ int64_t PartialOffset = 0) {
+ bool IsIndexSignExt = false;
+
+ // Split up a folded GlobalAddress+Offset into its component parts.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+ if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+ return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+ SDLoc(GA),
+ GA->getValueType(0),
+ /*Offset=*/PartialOffset,
+ /*isTargetGA=*/false,
+ GA->getTargetFlags()),
+ SDValue(),
+ GA->getOffset(),
+ IsIndexSignExt);
+ }
+
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
+ }
+
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
+ }
+};
+} // namespace
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
if (!BinOpCst) return SDValue();
- // FIXME: disable this unless the input to the binop is a shift by a constant.
- // If it is not a shift, it pessimizes some common cases like:
- //
- // void foo(int *X, int i) { X[i & 1235] = 1; }
- // int bar(int *X, int i) { return X[i & 255]; }
+ // FIXME: disable this unless the input to the binop is a shift by a constant
+ // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
- if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
- BinOpLHSVal->getOpcode() != ISD::SRA &&
- BinOpLHSVal->getOpcode() != ISD::SRL) ||
- !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
+ BinOpLHSVal->getOpcode() == ISD::SRA ||
+ BinOpLHSVal->getOpcode() == ISD::SRL;
+ bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal->getOpcode() == ISD::SELECT;
+
+ if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
+ !isCopyOrSelect)
+ return SDValue();
+
+ if (isCopyOrSelect && N->hasOneUse())
return SDValue();
EVT VT = N->getValueType(0);
@@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
SDValue N01 = N->getOperand(0).getOperand(1);
-
- if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
- if (!N01C->isOpaque()) {
- EVT TruncVT = N->getValueType(0);
- SDValue N00 = N->getOperand(0).getOperand(0);
- APInt TruncC = N01C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
- SDLoc DL(N);
-
- return DAG.getNode(ISD::AND, DL, TruncVT,
- DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
- DAG.getConstant(TruncC, DL, TruncVT));
- }
+ if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+ SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+ AddToWorklist(Trunc00.getNode());
+ AddToWorklist(Trunc01.getNode());
+ return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
}
}
@@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
@@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N01CV, N1CV))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
}
- } else {
- N1C = isConstOrConstSplat(N1);
}
}
}
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
// fold (shl c1, c2) -> c1<<c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
@@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
SDLoc DL(N);
- if (c1 + c2 >= OpSizeInBits)
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
+ APInt c1 = N0Op0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
EVT InnerShiftVT = N0Op0.getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c2.uge(OpSizeInBits - InnerShiftSize)) {
SDLoc DL(N0);
- if (c1 + c2 >= OpSizeInBits)
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SHL, DL, VT,
- DAG.getNode(N0.getOpcode(), DL, VT,
- N0Op0->getOperand(0)),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SHL, DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
}
@@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- if (c1 < VT.getScalarSizeInBits()) {
+ if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
if (c1 == c2) {
SDValue NewOp0 = N0.getOperand(0);
@@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
}
+
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
- if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true)) {
unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(BitSize,
- BitSize - N1C->getZExtValue()),
- DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
- HiBitsMask);
+ SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
- APInt Val;
- if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isa<ConstantSDNode>(N0.getOperand(1)) ||
- ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ AddToWorklist(Shl0.getNode());
+ AddToWorklist(Shl1.getNode());
return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
- if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- if (SDValue Folded =
- DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
- }
+ if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
+ isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ if (isConstantOrConstantVector(Shl))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
if (N1C && !N1C->isOpaque())
@@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+ return N0;
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N1C = isConstOrConstSplat(N1);
- }
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra -1, x) -> -1
if (isAllOnesConstant(N0))
return N0;
- // fold (sra x, (setge c, size(x))) -> undef
- if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ // fold (sra x, c >= size(x)) -> undef
+ if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
- unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= OpSizeInBits)
- Sum = OpSizeInBits - 1;
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
SDLoc DL(N);
- return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum, DL, N1.getValueType()));
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
+ Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+
+ return DAG.getNode(
+ ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (VT.isVector()) {
+ if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- N1C = isConstOrConstSplat(N1);
- }
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (isNullConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
- if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N01C->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
SDLoc DL(N);
- if (c1 + c2 >= OpSizeInBits)
+ APInt c1 = N0C1->getAPIntValue();
+ APInt c2 = N1C->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+ APInt Sum = c1 + c2;
+ if (Sum.uge(OpSizeInBits))
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+ return DAG.getNode(
+ ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
}
}
@@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
uint64_t c2 = N1C->getZExtValue();
EVT InnerShiftVT = N0.getOperand(0).getValueType();
EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
SDLoc DL(N0);
@@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
- if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
- unsigned BitSize = N0.getScalarValueSizeInBits();
- if (BitSize <= 64) {
- uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, DL, VT));
- }
+ if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+ SDLoc DL(N);
+ APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
+ SDValue Mask =
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ AddToWorklist(Mask.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+// TODO: We should handle other cases of selecting between {-1,0,1} here.
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT CondVT = Cond.getValueType();
+ SDLoc DL(N);
+
+ // fold (select Cond, 0, 1) -> (xor Cond, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (VT.isInteger() &&
+ (CondVT == MVT::i1 || (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
+ isNullConstant(N1) && isOneConstant(N2)) {
+ SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
+ DAG.getConstant(1, DL, CondVT));
+ if (VT.bitsEq(CondVT))
+ return NotCond;
+ return DAG.getZExtOrTrunc(NotCond, DL, VT);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5080,39 +5361,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold (select false, X, Y) -> Y
return !N0C->isNullValue() ? N1 : N2;
}
- // fold (select C, 1, X) -> (or C, X)
- if (VT == MVT::i1 && isOneConstant(N1))
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or C, Y)
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
- // fold (select C, 0, 1) -> (xor C, 1)
- // We can't do this reliably if integer based booleans have different contents
- // to floating point based booleans. This is because we can't tell whether we
- // have an integer-based boolean or a floating-point-based boolean unless we
- // can find the SETCC that produced it and inspect its operands. This is
- // fairly easy if C is the SETCC node, but it can potentially be
- // undiscoverable (or not reasonably discoverable). For example, it could be
- // in another basic block or it could require searching a complicated
- // expression.
- if (VT.isInteger() &&
- (VT0 == MVT::i1 || (VT0.isInteger() &&
- TLI.getBooleanContents(false, false) ==
- TLI.getBooleanContents(false, true) &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue XORNode;
- if (VT == VT0) {
- SDLoc DL(N);
- return DAG.getNode(ISD::XOR, DL, VT0,
- N0, DAG.getConstant(1, DL, VT0));
- }
- SDLoc DL0(N0);
- XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
- N0, DAG.getConstant(1, DL0, VT0));
- AddToWorklist(XORNode.getNode());
- if (VT.bitsGT(VT0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
- }
+
+ if (SDValue V = foldSelectOfConstants(N))
+ return V;
+
// fold (select C, 0, X) -> (and (not C), X)
if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
@@ -5125,16 +5381,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
AddToWorklist(NOTNode.getNode());
return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
}
- // fold (select C, X, 0) -> (and C, X)
- if (VT == MVT::i1 && isNullConstant(N2))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
- // fold (select X, X, Y) -> (or X, Y)
- // fold (select X, 1, Y) -> (or X, Y)
- if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select X, Y, X) -> (and X, Y)
// fold (select X, Y, 0) -> (and X, Y)
- if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
+ if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
// If we can fold this based on the true/false value, do so.
@@ -5145,7 +5394,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// The code in this block deals with the following 2 equivalences:
// select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
// select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
- // The target can specify its prefered form with the
+ // The target can specify its preferred form with the
// shouldNormalizeToSelectSequence() callback. However we always transform
// to the right anyway if we find the inner select exists in the DAG anyway
// and we always transform to the left side if we know that we can further
@@ -5214,6 +5463,18 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
}
+ // select (xor Cond, 1), X, Y -> select Cond, Y, X
+ if (VT0 == MVT::i1) {
+ if (N0->getOpcode() == ISD::XOR) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
+ SDValue Cond0 = N0->getOperand(0);
+ if (C->isOne())
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
+ Cond0, N2, N1);
+ }
+ }
+ }
+
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
// select x, y (fcmp lt x, y) -> fminnum x, y
@@ -5269,7 +5530,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
- SDLoc dl(N);
+ SDLoc DL(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -5316,7 +5577,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"One half of the selector was all UNDEFs and the other was all the "
"same value. This should have been addressed before this function.");
return DAG.getNode(
- ISD::CONCAT_VECTORS, dl, VT,
+ ISD::CONCAT_VECTORS, DL, VT,
BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
@@ -5390,6 +5651,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
SDValue Data = MST->getValue();
+ EVT VT = Data.getValueType();
SDLoc DL(N);
// If the MSTORE data type requires splitting and the mask is provided by a
@@ -5399,16 +5661,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (Mask.getOpcode() == ISD::SETCC) {
// Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
TargetLowering::TypeSplitVector)
return SDValue();
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
-
SDValue Chain = MST->getChain();
SDValue Ptr = MST->getBasePtr();
@@ -5418,8 +5677,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
- (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
+ (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -5433,11 +5691,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
Alignment, MST->getAAInfo(), MST->getRanges());
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- MST->isTruncatingStore());
+ MST->isTruncatingStore(),
+ MST->isCompressingStore());
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ MST->isCompressingStore());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),
@@ -5446,7 +5704,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- MST->isTruncatingStore());
+ MST->isTruncatingStore(),
+ MST->isCompressingStore());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -5585,11 +5844,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
- ISD::NON_EXTLOAD);
+ ISD::NON_EXTLOAD, MLD->isExpandingLoad());
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5597,7 +5855,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
- ISD::NON_EXTLOAD);
+ ISD::NON_EXTLOAD, MLD->isExpandingLoad());
AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());
@@ -5625,6 +5883,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDLoc DL(N);
+ // fold (vselect C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -5648,7 +5910,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
EVT VT = LHS.getValueType();
SDValue Shift = DAG.getNode(
ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -5803,7 +6065,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
- unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);
@@ -6026,7 +6288,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6038,9 +6300,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// See if the value being truncated is already sign extended. If so, just
// eliminate the trunc/sext pair.
SDValue Op = N0.getOperand(0);
- unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
- unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
- unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned OpBits = Op.getScalarValueSizeInBits();
+ unsigned MidBits = N0.getScalarValueSizeInBits();
+ unsigned DestBits = VT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
if (OpBits == DestBits) {
@@ -6201,7 +6463,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
// Here, T can be 1 or -1, depending on the type of the setcc and
// getBooleanContents().
- unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
+ unsigned SetCCWidth = N0.getScalarValueSizeInBits();
SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
@@ -6323,7 +6585,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6338,7 +6600,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
// fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode *oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6528,7 +6790,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// elements we can use a matching integer vector type and then
// truncate/sign extend.
EVT MatchingElementType = EVT::getIntegerVT(
- *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+ *DAG.getContext(), N00VT.getScalarSizeInBits());
EVT MatchingVectorType = EVT::getVectorVT(
*DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
SDValue VsetCC =
@@ -6558,8 +6820,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
- InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
+ InnerZExt.getOperand(0).getValueSizeInBits();
if (ShAmtVal > KnownZeroBits)
return SDValue();
}
@@ -6598,7 +6860,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ SDNode *oye = N0.getOperand(0).getNode();
if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
// CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6625,15 +6887,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant &&
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
+ SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
+ X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
} else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
- SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -6820,7 +7082,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
- if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
@@ -6952,8 +7214,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N1)->getVT();
- unsigned VTBits = VT.getScalarType().getSizeInBits();
- unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned EVTBits = EVT.getScalarSizeInBits();
if (N0.isUndef())
return DAG.getUNDEF(VT);
@@ -6977,14 +7239,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ if (N00.getScalarValueSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg (zext x)) -> (sext x)
+ // iff we are extending the source sign bit.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getScalarValueSizeInBits() == EVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
- return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
// demanded.
@@ -7111,6 +7382,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return N0.getOperand(0);
}
+ // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+ return SDValue();
+
// Fold extract-and-trunc into a narrow extract. For example:
// i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
// i32 y = TRUNCATE(i64 x)
@@ -7148,7 +7423,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
- if (N0.getOpcode() == ISD::SELECT) {
+ if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
EVT SrcVT = N0.getValueType();
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
@@ -7160,15 +7435,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
- // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+ // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
uint64_t Amt = CAmt->getZExtValue();
- unsigned Size = VT.getSizeInBits();
+ unsigned Size = VT.getScalarSizeInBits();
- if (Amt < Size / 2) {
+ if (Amt < Size) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7525,7 +7800,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
- unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
@@ -7848,10 +8123,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -7861,8 +8140,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1->getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL &&
+ N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8090,11 +8373,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// More folding opportunities when target permits.
- if ((AllowFusion || HasFMAD) && Aggressive) {
+ if (Aggressive) {
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8106,7 +8393,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
+ // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
+ // are currently only supported on binary nodes.
+ if (Options.UnsafeFPMath &&
+ N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FMUL) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
@@ -8221,8 +8511,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return SDValue();
}
-/// Try to perform FMA combining on a given FMUL node.
-SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -8231,17 +8523,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
- // Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ // The transforms below are incorrect when x == 0 and y == inf, because the
+ // intermediate multiplication produces a nan.
+ if (!Options.NoInfsFPMath)
+ return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ // Floating-point multiply-add with intermediate rounding. This can result
+ // in a less precise result due to the changed rounding order.
+ bool HasFMAD = Options.UnsafeFPMath &&
+ (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
@@ -8338,17 +8636,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return DAG.getNode(ISD::FSUB, DL, VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ // FIXME: Auto-upgrade the target/function-level option.
+ if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ // fold (fadd A, 0) -> A
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
+ }
+
// If 'unsafe math' is enabled, fold lots of things.
if (Options.UnsafeFPMath) {
// No FP constant should be created after legalization as Instruction
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
- // fold (fadd A, 0) -> A
- if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
- if (N1C->isZero())
- return N0;
-
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
@@ -8457,7 +8758,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
EVT VT = N->getValueType(0);
- SDLoc dl(N);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
@@ -8468,30 +8769,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return DAG.getNode(ISD::FADD, dl, VT, N0,
+ return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
- // If 'unsafe math' is enabled, fold lots of things.
- if (Options.UnsafeFPMath) {
- // (fsub A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
-
+ // FIXME: Auto-upgrade the target/function-level option.
+ if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
+ }
+
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero())
+ return N0;
// (fsub x, x) -> 0.0
if (N0 == N1)
- return DAG.getConstantFP(0.0f, dl, VT);
+ return DAG.getConstantFP(0.0f, DL, VT);
// (fsub x, (fadd x, y)) -> (fneg y)
// (fsub x, (fadd y, x)) -> (fneg y)
@@ -8611,7 +8915,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// FMUL -> FMA combines:
- if (SDValue Fused = visitFMULForFMACombine(N)) {
+ if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -8626,14 +8930,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- SDLoc dl(N);
+ SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {
- return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+ return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
if (Options.UnsafeFPMath) {
@@ -8663,8 +8967,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
&Flags), &Flags);
}
@@ -8672,9 +8976,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N0.getOpcode() == ISD::FMUL &&
isConstantFPBuildVectorOrConstantFP(N1) &&
isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, dl, VT,
+ return DAG.getNode(ISD::FMA, DL, VT,
N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
&Flags),
N2);
}
@@ -8685,32 +8989,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
// TODO: The FMA node should have flags that propagate to this node.
- return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
- SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
// TODO: The FMA node should have flags that propagate to this node.
- return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
}
if (Options.UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT),
- &Flags), &Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT), &Flags),
+ &Flags);
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT),
- &Flags), &Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT), &Flags),
+ &Flags);
}
}
@@ -8720,7 +9024,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal.
// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-// Notice that this is not always beneficial. One reason is different target
+// Notice that this is not always beneficial. One reason is different targets
// may have different costs for FDIV and FMUL, so sometimes the cost of two
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
@@ -8907,14 +9211,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
// TODO: FSQRT nodes should have flags that propagate to the created nodes.
// For now, create a Flags object for use with all unsafe math transforms.
SDNodeFlags Flags;
Flags.setUnsafeAlgebra(true);
- return buildSqrtEstimate(N->getOperand(0), &Flags);
+ return buildSqrtEstimate(N0, &Flags);
}
/// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -8941,11 +9249,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
- if (N0CFP && N1CFP) // Constant fold
+ if (N0CFP && N1CFP) // Constant fold
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
if (N1CFP) {
- const APFloat& V = N1CFP->getValueAPF();
+ const APFloat &V = N1CFP->getValueAPF();
// copysign(x, c1) -> fabs(x) iff ispos(c1)
// copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
if (!V.isNegative()) {
@@ -8963,8 +9271,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(copysign(x,z), y) -> copysign(x, y)
if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
N0.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0.getOperand(0), N1);
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
// copysign(x, abs(y)) -> abs(x)
if (N1.getOpcode() == ISD::FABS)
@@ -8972,14 +9279,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
// copysign(x, copysign(y,z)) -> copysign(x, z)
if (N1.getOpcode() == ISD::FCOPYSIGN)
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0, N1.getOperand(1));
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
- return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
- N0, N1.getOperand(0));
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
return SDValue();
}
@@ -9159,7 +9464,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
- const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
// Skip this folding if it results in an fp_round from f80 to f16.
//
@@ -9232,7 +9537,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
// value of X.
if (N0.getOpcode() == ISD::FP_ROUND
- && N0.getNode()->getConstantOperandVal(1) == 1) {
+ && N0.getConstantOperandVal(1) == 1) {
SDValue In = N0.getOperand(0);
if (In.getValueType() == VT) return In;
if (VT.bitsLT(In.getValueType()))
@@ -9319,7 +9624,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x80... per scalar element
// and splat it.
- SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x80...
@@ -9424,7 +9729,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getValueType().isVector()) {
// For a vector, get a mask such as 0x7f... per scalar element
// and splat it.
- SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
} else {
// For a scalar, just generate 0x7f...
@@ -10103,7 +10408,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// value.
// TODO: Handle store large -> read small portion.
// TODO: Handle TRUNCSTORE/LOADEXT
- if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (OptLevel != CodeGenOpt::None &&
+ ISD::isNormalLoad(N) && !LD->isVolatile()) {
if (ISD::isNON_TRUNCStore(Chain.getNode())) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
@@ -10405,7 +10711,7 @@ struct LoadedSlice {
assert(Inst && Origin && "Unable to replace a non-existing slice.");
const SDValue &OldBaseAddr = Origin->getBasePtr();
SDValue BaseAddr = OldBaseAddr;
- // Get the offset in that chunk of bytes w.r.t. the endianess.
+ // Get the offset in that chunk of bytes w.r.t. the endianness.
int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
assert(Offset >= 0 && "Offset too big to fit in int64_t!");
if (Offset) {
@@ -10705,7 +11011,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
LSIt != LSItEnd; ++LSIt) {
SDValue SliceInst = LSIt->loadSlice();
CombineTo(LSIt->Inst, SliceInst, true);
- if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+ if (SliceInst.getOpcode() != ISD::LOAD)
SliceInst = SliceInst.getOperand(0);
assert(SliceInst->getOpcode() == ISD::LOAD &&
"It takes more than a zext to get to the loaded slice!!");
@@ -11033,110 +11339,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
-namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (add (i8 load %index)
-/// (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
-/// (i32 1)))))
-struct BaseIndexOffset {
- SDValue Base;
- SDValue Index;
- int64_t Offset;
- bool IsIndexSignExt;
-
- BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
- BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
- bool IsIndexSignExt) :
- Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
- bool equalBaseIndex(const BaseIndexOffset &Other) {
- return Other.Base == Base && Other.Index == Index &&
- Other.IsIndexSignExt == IsIndexSignExt;
- }
-
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
- bool IsIndexSignExt = false;
-
- // Split up a folded GlobalAddress+Offset into its component parts.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
- if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
- return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
- SDLoc(GA),
- GA->getValueType(0),
- /*Offset=*/0,
- /*isTargetGA=*/false,
- GA->getTargetFlags()),
- SDValue(),
- GA->getOffset(),
- IsIndexSignExt);
- }
-
- // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
- // instruction, then it could be just the BASE or everything else we don't
- // know how to handle. Just use Ptr as BASE and give up.
- if (Ptr->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // We know that we have at least an ADD instruction. Try to pattern match
- // the simple case of BASE + OFFSET.
- if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
- int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
- return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
- IsIndexSignExt);
- }
-
- // Inside a loop the current BASE pointer is calculated using an ADD and a
- // MUL instruction. In this case Ptr is the actual BASE pointer.
- // (i64 add (i64 %array_ptr)
- // (i64 mul (i64 %induction_var)
- // (i64 %element_size)))
- if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // Look at Base + Index + Offset cases.
- SDValue Base = Ptr->getOperand(0);
- SDValue IndexOffset = Ptr->getOperand(1);
-
- // Skip signextends.
- if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
- IndexOffset = IndexOffset->getOperand(0);
- IsIndexSignExt = true;
- }
-
- // Either the case of Base + Index (no offset) or something else.
- if (IndexOffset->getOpcode() != ISD::ADD)
- return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
-
- // Now we have the case of Base + Index + offset.
- SDValue Index = IndexOffset->getOperand(0);
- SDValue Offset = IndexOffset->getOperand(1);
-
- if (!isa<ConstantSDNode>(Offset))
- return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
- // Ignore signextends.
- if (Index->getOpcode() == ISD::SIGN_EXTEND) {
- Index = Index->getOperand(0);
- IsIndexSignExt = true;
- } else IsIndexSignExt = false;
-
- int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
- return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
- }
-};
-} // namespace
-
// This is a helper function for visitMUL to check the profitability
// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
// MulNode is the original multiply, AddNode is (add x, c1),
@@ -11351,6 +11553,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
}
}
+ StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
return true;
}
@@ -11493,7 +11696,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+bool DAGCombiner::MergeConsecutiveStores(
+ StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -11537,16 +11741,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// any of the store nodes.
SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
- // Save the StoreSDNodes that we find in the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
-
getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dep endence check in AA case
+ // only do dependence check in AA case
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
: DAG.getSubtarget().useAA();
if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
@@ -11582,10 +11783,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
// Check if this store interferes with any of the loads that we found.
// If we find a load that alias with this store. Stop the sequence.
- if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
- [&](LSBaseSDNode* Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
+ if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
@@ -11899,6 +12099,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
}
}
+ StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
return true;
}
@@ -12088,11 +12289,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter =
- GetDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits()));
+ SDValue Shorter = GetDemandedBits(
+ Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits()));
AddToWorklist(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
@@ -12100,10 +12299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
- if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ if (SimplifyDemandedBits(
+ Value,
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits())))
return SDValue(N, 0);
}
@@ -12144,19 +12343,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Only perform this optimization before the types are legal, because we
// don't want to perform this optimization on every DAGCombine invocation.
if (!LegalTypes) {
- bool EverChanged = false;
-
- do {
+ for (;;) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- bool Changed = MergeConsecutiveStores(ST);
- EverChanged |= Changed;
+ SmallVector<MemOpLink, 8> StoreNodes;
+ bool Changed = MergeConsecutiveStores(ST, StoreNodes);
if (!Changed) break;
- } while (ST->getOpcode() != ISD::DELETED_NODE);
- if (EverChanged)
- return SDValue(N, 0);
+ if (any_of(StoreNodes,
+ [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
+ // ST has been merged and no longer exists.
+ return SDValue(N, 0);
+ }
+ }
}
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -12169,14 +12369,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return NewSt;
}
+ if (SDValue NewSt = splitMergedValStore(ST))
+ return NewSt;
+
return ReduceLoadOpStoreWidth(N);
}
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+/// (store (or (zext (bitcast F to i32) to i64),
+/// (shl (zext I to i64), 32)), addr) -->
+/// (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8}, i32 store --> two i16 stores.
+/// For pair of {i8, i8}, i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+/// void goo(const std::pair<int, float> &);
+/// hoo() {
+/// ...
+/// goo(std::make_pair(tmp, ftmp));
+/// ...
+/// }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+ if (OptLevel == CodeGenOpt::None)
+ return SDValue();
+
+ SDValue Val = ST->getValue();
+ SDLoc DL(ST);
+
+ // Match OR operand.
+ if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+ return SDValue();
+
+ // Match SHL operand and get Lower and Higher parts of Val.
+ SDValue Op1 = Val.getOperand(0);
+ SDValue Op2 = Val.getOperand(1);
+ SDValue Lo, Hi;
+ if (Op1.getOpcode() != ISD::SHL) {
+ std::swap(Op1, Op2);
+ if (Op1.getOpcode() != ISD::SHL)
+ return SDValue();
+ }
+ Lo = Op2;
+ Hi = Op1.getOperand(0);
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Match shift amount to HalfValBitSize.
+ unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+ return SDValue();
+
+ // Lo and Hi are zero-extended from int with size less equal than 32
+ // to i64.
+ if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+ !Lo.getOperand(0).getValueType().isScalarInteger() ||
+ Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+ Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+ !Hi.getOperand(0).getValueType().isScalarInteger() ||
+ Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+ return SDValue();
+
+ // Use the EVT of low and high parts before bitcast as the input
+ // of target query.
+ EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Lo.getOperand(0).getValueType()
+ : Lo.getValueType();
+ EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+ ? Hi.getOperand(0).getValueType()
+ : Hi.getValueType();
+ if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+ return SDValue();
+
+ // Start to split store.
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+ EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ // Lower value store.
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
+ Ptr =
+ DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ // Higher value store.
+ SDValue St1 =
+ DAG.getStore(St0, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ Alignment / 2, MMOFlags, AAInfo);
+ return St1;
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
- SDLoc dl(N);
+ SDLoc DL(N);
// If the inserted element is an UNDEF, just use the input vector.
if (InVal.isUndef())
@@ -12206,7 +12515,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
if (Elt < OtherElt) {
// Swap nodes.
- SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
InVec.getOperand(0), InVal, EltNo);
AddToWorklist(NewOp.getNode());
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
@@ -12237,13 +12546,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT OpVT = Ops[0].getValueType();
if (InVal.getValueType() != OpVT)
InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
Ops[Elt] = InVal;
}
// Return the new vector
- return DAG.getBuildVector(VT, dl, Ops);
+ return DAG.getBuildVector(VT, DL, Ops);
}
SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
@@ -12544,7 +12853,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
return SDValue();
unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
// Check to see if this is a BUILD_VECTOR of a bunch of values
@@ -12603,7 +12912,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
assert(ElemRatio > 1 && "Invalid element size ratio");
SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
- DAG.getConstant(0, SDLoc(N), SourceType);
+ DAG.getConstant(0, DL, SourceType);
unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -12634,7 +12943,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!isTypeLegal(VecVT)) return SDValue();
// Make the new BUILD_VECTOR.
- SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
+ SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorklist(BV.getNode());
@@ -12646,7 +12955,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+ SDLoc DL(N);
EVT SrcVT = MVT::Other;
unsigned Opcode = ISD::DELETED_NODE;
@@ -12707,30 +13016,126 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
else
Opnds.push_back(In.getOperand(0));
}
- SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
+ SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
AddToWorklist(BV.getNode());
- return DAG.getNode(Opcode, dl, VT, BV);
+ return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
- unsigned NumInScalars = N->getNumOperands();
- SDLoc dl(N);
+SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+ ArrayRef<int> VectorMask,
+ SDValue VecIn1, SDValue VecIn2,
+ unsigned LeftIdx) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+
EVT VT = N->getValueType(0);
+ EVT InVT1 = VecIn1.getValueType();
+ EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+ unsigned Vec2Offset = InVT1.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ShuffleNumElems = NumElems;
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Try to make the types match the type of the output.
+ if (InVT1 != VT || InVT2 != VT) {
+ if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ // If the output vector length is a multiple of both input lengths,
+ // we can concatenate them and pad the rest with undefs.
+ unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+ SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+ ConcatOps[0] = VecIn1;
+ ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ VecIn2 = SDValue();
+ } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+ return SDValue();
- // A vector built entirely of undefs is undef.
- if (ISD::allOperandsUndef(N))
- return DAG.getUNDEF(VT);
+ if (!VecIn2.getNode()) {
+ // If we only have one input vector, and it's twice the size of the
+ // output, split it in two.
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+ DAG.getConstant(NumElems, DL, IdxTy));
+ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+ // Since we now have shorter input vectors, adjust the offset of the
+ // second vector's start.
+ Vec2Offset = NumElems;
+ } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ // VecIn1 is wider than the output, and we have another, possibly
+ // smaller input. Pad the smaller input with undefs, shuffle at the
+ // input vector width, and extract the output.
+ // The shuffle type is different than VT, so check legality again.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+ return SDValue();
- if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
- return V;
+ // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
+ // lower it back into a BUILD_VECTOR. So if the inserted type is
+ // illegal, don't even try.
+ if (InVT1 != InVT2) {
+ if (!TLI.isTypeLegal(InVT2))
+ return SDValue();
+ VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+ DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+ }
+ ShuffleNumElems = NumElems * 2;
+ } else {
+ // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
+ // than VecIn1. We can't handle this for now - this case will disappear
+ // when we start sorting the vectors by type.
+ return SDValue();
+ }
+ } else {
+ // TODO: Support cases where the length mismatch isn't exactly by a
+ // factor of 2.
+ // TODO: Move this check upwards, so that if we have bad type
+ // mismatches, we don't create any DAG nodes.
+ return SDValue();
+ }
+ }
- if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
- return V;
+ // Initialize mask to undef.
+ SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+ // Only need to run up to the number of elements actually used, not the
+ // total number of elements in the shuffle - if we are shuffling a wider
+ // vector, the high lanes should be set to undef.
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] <= 0)
+ continue;
+
+ unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+ if (VectorMask[i] == (int)LeftIdx) {
+ Mask[i] = ExtIndex;
+ } else if (VectorMask[i] == (int)LeftIdx + 1) {
+ Mask[i] = Vec2Offset + ExtIndex;
+ }
+ }
+
+ // The type the input vectors may have changed above.
+ InVT1 = VecIn1.getValueType();
+
+ // If we already have a VecIn2, it should have the same type as VecIn1.
+ // If we don't, get an undef/zero vector of the appropriate type.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+ assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
+
+ SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+ if (ShuffleNumElems > NumElems)
+ Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
- // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
- // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
- // at most two distinct vectors, turn this into a shuffle node.
+ return Shuffle;
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
// Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
if (!isTypeLegal(VT))
@@ -12740,149 +13145,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
return SDValue();
- SDValue VecIn1, VecIn2;
bool UsesZeroVector = false;
- for (unsigned i = 0; i != NumInScalars; ++i) {
+ unsigned NumElems = N->getNumOperands();
+
+ // Record, for each element of the newly built vector, which input vector
+ // that element comes from. -1 stands for undef, 0 for the zero vector,
+ // and positive values for the input vectors.
+ // VectorMask maps each element to its vector number, and VecIn maps vector
+ // numbers to their initial SDValues.
+
+ SmallVector<int, 8> VectorMask(NumElems, -1);
+ SmallVector<SDValue, 8> VecIn;
+ VecIn.push_back(SDValue());
+
+ for (unsigned i = 0; i != NumElems; ++i) {
SDValue Op = N->getOperand(i);
- // Ignore undef inputs.
- if (Op.isUndef()) continue;
- // See if we can combine this build_vector into a blend with a zero vector.
- if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
+ if (Op.isUndef())
+ continue;
+
+ // See if we can use a blend with a zero vector.
+ // TODO: Should we generalize this to a blend with an arbitrary constant
+ // vector?
+ if (isNullConstant(Op) || isNullFPConstant(Op)) {
UsesZeroVector = true;
+ VectorMask[i] = 0;
continue;
}
- // If this input is something other than a EXTRACT_VECTOR_ELT with a
- // constant index, bail out.
+ // Not an undef or zero. If the input is something other than an
+ // EXTRACT_VECTOR_ELT with a constant index, bail out.
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Op.getOperand(1))) {
- VecIn1 = VecIn2 = SDValue(nullptr, 0);
- break;
- }
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
- // We allow up to two distinct input vectors.
SDValue ExtractedFromVec = Op.getOperand(0);
- if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
- continue;
- if (!VecIn1.getNode()) {
- VecIn1 = ExtractedFromVec;
- } else if (!VecIn2.getNode() && !UsesZeroVector) {
- VecIn2 = ExtractedFromVec;
- } else {
- // Too many inputs.
- VecIn1 = VecIn2 = SDValue(nullptr, 0);
- break;
- }
- }
-
- // If everything is good, we can make a shuffle operation.
- if (VecIn1.getNode()) {
- unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
- SmallVector<int, 8> Mask;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- unsigned Opcode = N->getOperand(i).getOpcode();
- if (Opcode == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
+ // All inputs must have the same element type as the output.
+ if (VT.getVectorElementType() !=
+ ExtractedFromVec.getValueType().getVectorElementType())
+ return SDValue();
- // Operands can also be zero.
- if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
- assert(UsesZeroVector &&
- (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
- "Unexpected node found!");
- Mask.push_back(NumInScalars+i);
- continue;
- }
+ // Have we seen this input vector before?
+ // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+ // a map back from SDValues to numbers isn't worth it.
+ unsigned Idx = std::distance(
+ VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ if (Idx == VecIn.size())
+ VecIn.push_back(ExtractedFromVec);
- // If extracting from the first vector, just use the index directly.
- SDValue Extract = N->getOperand(i);
- SDValue ExtVal = Extract.getOperand(1);
- unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
- if (Extract.getOperand(0) == VecIn1) {
- Mask.push_back(ExtIndex);
- continue;
- }
+ VectorMask[i] = Idx;
+ }
- // Otherwise, use InIdx + InputVecSize
- Mask.push_back(InNumElements + ExtIndex);
- }
+ // If we didn't find at least one input vector, bail out.
+ if (VecIn.size() < 2)
+ return SDValue();
- // Avoid introducing illegal shuffles with zero.
- if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ // TODO: We want to sort the vectors by descending length, so that adjacent
+ // pairs have similar length, and the longer vector is always first in the
+ // pair.
+
+ // TODO: Should this fire if some of the input vectors has illegal type (like
+ // it does now), or should we let legalization run its course first?
+
+ // Shuffle phase:
+ // Take pairs of vectors, and shuffle them so that the result has elements
+ // from these vectors in the correct places.
+ // For example, given:
+ // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+ // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+ // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+ // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+ // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+ // We will generate:
+ // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+ // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+ SmallVector<SDValue, 4> Shuffles;
+ for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+ unsigned LeftIdx = 2 * In + 1;
+ SDValue VecLeft = VecIn[LeftIdx];
+ SDValue VecRight =
+ (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+ if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+ VecRight, LeftIdx))
+ Shuffles.push_back(Shuffle);
+ else
return SDValue();
+ }
- // We can't generate a shuffle node with mismatched input and output types.
- // Attempt to transform a single input vector to the correct type.
- if ((VT != VecIn1.getValueType())) {
- // If the input vector type has a different base type to the output
- // vector type, bail out.
- EVT VTElemType = VT.getVectorElementType();
- if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
- (VecIn2.getNode() &&
- (VecIn2.getValueType().getVectorElementType() != VTElemType)))
- return SDValue();
+ // If we need the zero vector as an "ingredient" in the blend tree, add it
+ // to the list of shuffles.
+ if (UsesZeroVector)
+ Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+ : DAG.getConstantFP(0.0, DL, VT));
- // If the input vector is too small, widen it.
- // We only support widening of vectors which are half the size of the
- // output registers. For example XMM->YMM widening on X86 with AVX.
- EVT VecInT = VecIn1.getValueType();
- if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
- // If we only have one small input, widen it by adding undef values.
- if (!VecIn2.getNode())
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
- DAG.getUNDEF(VecIn1.getValueType()));
- else if (VecIn1.getValueType() == VecIn2.getValueType()) {
- // If we have two small inputs of the same type, try to concat them.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
- VecIn2 = SDValue(nullptr, 0);
- } else
- return SDValue();
- } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
- // If the input vector is too large, try to split it.
- // We don't support having two input vectors that are too large.
- // If the zero vector was used, we can not split the vector,
- // since we'd need 3 inputs.
- if (UsesZeroVector || VecIn2.getNode())
- return SDValue();
+ // If we only have one shuffle, we're done.
+ if (Shuffles.size() == 1)
+ return Shuffles[0];
- if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
- return SDValue();
+ // Update the vector mask to point to the post-shuffle vectors.
+ for (int &Vec : VectorMask)
+ if (Vec == 0)
+ Vec = Shuffles.size() - 1;
+ else
+ Vec = (Vec - 1) / 2;
+
+ // More than one shuffle. Generate a binary tree of blends, e.g. if from
+ // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+ // generate:
+ // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+ // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+ // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+ // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+ // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+ // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+ // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+ // Make sure the initial size of the shuffle list is even.
+ if (Shuffles.size() % 2)
+ Shuffles.push_back(DAG.getUNDEF(VT));
+
+ for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+ if (CurSize % 2) {
+ Shuffles[CurSize] = DAG.getUNDEF(VT);
+ CurSize++;
+ }
+ for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+ int Left = 2 * In;
+ int Right = 2 * In + 1;
+ SmallVector<int, 8> Mask(NumElems, -1);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (VectorMask[i] == Left) {
+ Mask[i] = i;
+ VectorMask[i] = In;
+ } else if (VectorMask[i] == Right) {
+ Mask[i] = i + NumElems;
+ VectorMask[i] = In;
+ }
+ }
- // Try to replace VecIn1 with two extract_subvectors
- // No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(VT.getVectorNumElements(), dl,
- TLI.getVectorIdxTy(DAG.getDataLayout())));
- VecIn1 = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- } else
- return SDValue();
+ Shuffles[In] =
+ DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
}
+ }
- if (UsesZeroVector)
- VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
- DAG.getConstantFP(0.0, dl, VT);
- else
- // If VecIn2 is unused then change it to undef.
- VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ return Shuffles[0];
+}
- // Check that we were able to transform all incoming values to the same
- // type.
- if (VecIn2.getValueType() != VecIn1.getValueType() ||
- VecIn1.getValueType() != VT)
- return SDValue();
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
- // Return the new VECTOR_SHUFFLE node.
- SDValue Ops[2];
- Ops[0] = VecIn1;
- Ops[1] = VecIn2;
- return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
- }
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecToShuffle(N))
+ return V;
return SDValue();
}
@@ -13071,8 +13496,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
- SDLoc dl = SDLoc(N);
- SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
return DAG.getBitcast(VT, Res);
}
}
@@ -13208,7 +13632,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
V = V.getOperand(0);
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
- SDLoc dl(N);
// Handle only simple case where vector being inserted and vector
// being extracted are of same type, and are half size of larger vectors.
EVT BigVT = V->getOperand(0).getValueType();
@@ -13228,11 +13651,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// Into:
// indices are equal or bit offsets are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
- if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
- ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
return DAG.getBitcast(NVT, V->getOperand(1));
return DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
N->getOperand(1));
}
@@ -13391,6 +13814,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load. A BUILD_VECTOR where each
+// element is identical is a splat. A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue N0 = SVN->getOperand(0);
+ SDValue N1 = SVN->getOperand(1);
+
+ if (!N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+ // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+ // discussed above.
+ if (!N1.isUndef()) {
+ bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
+ bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+ if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+ return SDValue();
+ if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+ return SDValue();
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ SmallSet<SDValue, 16> DuplicateOps;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M < (int)NumElts ? M : M - NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ return SDValue();
+ }
+ }
+
+ // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
+ // fine, but it's likely to generate low-quality code if the target can't
+ // reconstruct an appropriate shuffle.
+ if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+ if (!DuplicateOps.insert(Op).second)
+ return SDValue();
+
+ Ops.push_back(Op);
+ }
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+ return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13506,40 +14007,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
- SmallVector<SDValue, 8> Ops;
- for (int M : SVN->getMask()) {
- SDValue Op = DAG.getUNDEF(VT.getScalarType());
- if (M >= 0) {
- int Idx = M % NumElts;
- SDValue &S = (M < (int)NumElts ? N0 : N1);
- if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
- Op = S.getOperand(Idx);
- } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
- if (Idx == 0)
- Op = S.getOperand(0);
- } else {
- // Operand can't be combined - bail out.
- break;
- }
- }
- Ops.push_back(Op);
- }
- if (Ops.size() == VT.getVectorNumElements()) {
- // BUILD_VECTOR requires all inputs to be of the same type, find the
- // maximum type and extend them all.
- EVT SVT = VT.getScalarType();
- if (SVT.isInteger())
- for (SDValue &Op : Ops)
- SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT != VT.getScalarType())
- for (SDValue &Op : Ops)
- Op = TLI.isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
- : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
- return DAG.getBuildVector(VT, SDLoc(N), Ops);
- }
- }
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+ return Res;
// If this shuffle only has a single input that is a bitcasted shuffle,
// attempt to merge the 2 shuffles and suitably bitcast the inputs/output
@@ -13647,6 +14117,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+ // Don't try to fold splats; they're likely to simplify somehow, or they
+ // might be free.
+ if (OtherSV->isSplat())
+ return SDValue();
+
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
@@ -13773,10 +14248,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+ // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+ // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N0.getOperand(1).getValueType() == N1.getValueType() &&
+ N0.getOperand(2) == N2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+ N1, N2);
+
if (N0.getValueType() != N1.getValueType())
return SDValue();
@@ -13785,7 +14270,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
N2.getOpcode() == ISD::Constant) {
APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
- EVT VT = N->getValueType(0);
// Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
// (concat_vectors Z, Y)
@@ -13836,7 +14320,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDLoc dl(N);
+ SDLoc DL(N);
// Make sure we're not running after operation legalization where it
// may have custom lowered the vector shuffles.
@@ -13904,8 +14388,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
return SDValue();
- SDValue Zero = DAG.getConstant(0, dl, ClearVT);
- return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
DAG.getBitcast(ClearVT, LHS),
Zero, Indices));
};
@@ -14119,6 +14603,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
+ if (!RLD->isDereferenceable())
+ MMOFlags &= ~MachineMemOperand::MODereferenceable;
if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
// FIXME: Discards pointer and AA info.
Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
@@ -14146,6 +14632,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
return false;
}
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+ SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // If this is a select where the false operand is zero and the compare is a
+ // check of the sign bit, see if we can perform the "gzip trick":
+ // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+ // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (!isNullConstant(N3) || !XType.bitsGE(AType))
+ return SDValue();
+
+ // If the comparison is testing for a positive value, we have to invert
+ // the sign bit mask, so only do that transform if the target has a bitwise
+ // 'and not' instruction (the invert is free).
+ if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+ // (X > -1) ? A : 0
+ // (X > 0) ? X : 0 <-- This is canonical signed max.
+ if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else if (CC == ISD::SETLT) {
+ // (X < 0) ? A : 0
+ // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
+ if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+ return SDValue();
+ } else {
+ return SDValue();
+ }
+
+ // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+ // constant.
+ EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+ auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+ unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -14242,48 +14795,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
- // Check to see if we can perform the "gzip trick", transforming
- // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
- if (isNullConstant(N3) && CC == ISD::SETLT &&
- (isNullConstant(N1) || // (a < 0) ? b : 0
- (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
- EVT XType = N0.getValueType();
- EVT AType = N2.getValueType();
- if (XType.bitsGE(AType)) {
- // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
- // single-bit constant.
- if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
- unsigned ShCtV = N2C->getAPIntValue().logBase2();
- ShCtV = XType.getSizeInBits() - ShCtV - 1;
- SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
- getShiftAmountTy(N0.getValueType()));
- SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
- XType, N0, ShCt);
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorklist(Shift.getNode());
- }
-
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
- }
-
- SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
- XType, N0,
- DAG.getConstant(XType.getSizeInBits() - 1,
- SDLoc(N0),
- getShiftAmountTy(N0.getValueType())));
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
- AddToWorklist(Shift.getNode());
- }
-
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
- }
- }
+ if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+ return V;
// fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
// where y is has a single bit set.
@@ -14511,30 +15024,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return S;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+ EVT VT = V.getValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+ SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+ return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// =>
+/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+/// does not require additional intermediate precision]
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+ AddToWorklist(Est.getNode());
- unsigned Iterations = 0;
- if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
if (Iterations) {
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- AddToWorklist(Est.getNode());
-
// Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (unsigned i = 0; i < Iterations; ++i) {
+ for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
@@ -14656,16 +15190,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
- unsigned Iterations = 0;
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
bool UseOneConstNR = false;
- if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ if (SDValue Est =
+ TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+ Reciprocal)) {
AddToWorklist(Est.getNode());
+
if (Iterations) {
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+ if (!Reciprocal) {
+ // Unfortunately, Est is now NaN if the input was exactly 0.0.
+ // Select out this case and force the answer to 0.0.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = getSetCCResultType(VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+
+ Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ ZeroCmp, FPZero, Est);
+ AddToWorklist(Est.getNode());
+ }
}
return Est;
}
@@ -14678,23 +15243,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
}
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
- SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
- if (!Est)
- return SDValue();
-
- // Unfortunately, Est is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- EVT VT = Est.getValueType();
- SDLoc DL(Op);
- SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
- Zero, Est);
- AddToWorklist(Est.getNode());
- return Est;
+ return buildSqrtEstimateImpl(Op, Flags, false);
}
/// Return true if base is a frame index, which is known not to alias with
@@ -14771,9 +15320,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// To catch this case, look up the actual index of frame indices to compute
// the real alias relationship.
if (isFrameIndex1 && isFrameIndex2) {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
- Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
(Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index b10da00..e2f33bb 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -353,8 +353,8 @@ void FastISel::recomputeInsertPt() {
void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
- assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) &&
- std::distance(I, E) > 0 && "Invalid iterator!");
+ assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
+ "Invalid iterator!");
while (I != E) {
MachineInstr *Dead = &*I;
++I;
@@ -455,17 +455,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
return true;
}
- // Check if the second operand is a constant float.
- if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
- unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op0IsKill, CF);
- if (ResultReg) {
- // We successfully emitted code for the given LLVM Instruction.
- updateValueMap(I, ResultReg);
- return true;
- }
- }
-
unsigned Op1 = getRegForValue(I->getOperand(1));
if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -499,7 +488,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
GTI != E; ++GTI) {
const Value *Idx = GTI.getOperand();
- if (auto *StTy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
@@ -581,7 +570,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
Ops.push_back(MachineOperand::CreateImm(0));
} else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
- // Values coming from a stack location also require a sepcial encoding,
+ // Values coming from a stack location also require a special encoding,
// but that is added later on by the target specific frame index
// elimination implementation.
auto SI = FuncInfo.StaticAllocaMap.find(AI);
@@ -666,7 +655,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
.addImm(0);
// Inform the Frame Information that we have a stackmap in this function.
- FuncInfo.MF->getFrameInfo()->setHasStackMap();
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
return true;
}
@@ -707,7 +696,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
- const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+ StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
SmallString<32> MangledName;
Mangler::getNameWithPrefix(MangledName, Target, DL);
MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
@@ -845,7 +834,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
CLI.Call->eraseFromParent();
// Inform the Frame Information that we have a patchpoint in this function.
- FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
if (CLI.NumResultRegs)
updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
@@ -1077,7 +1066,7 @@ bool FastISel::selectCall(const User *I) {
}
MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
- ComputeUsesVAFloatArgument(*Call, &MMI);
+ computeUsesVAFloatArgument(*Call, MMI);
// Handle intrinsic function calls.
if (const auto *II = dyn_cast<IntrinsicInst>(Call))
@@ -1104,6 +1093,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::lifetime_end:
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
+ // Neither does the assume intrinsic; it's also OK not to codegen its operand.
+ case Intrinsic::assume:
return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
@@ -1225,6 +1216,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
+ case Intrinsic::invariant_group_barrier:
case Intrinsic::expect: {
unsigned ResultReg = getRegForValue(II->getArgOperand(0));
if (!ResultReg)
@@ -1324,15 +1316,6 @@ bool FastISel::selectBitCast(const User *I) {
return true;
}
-// Return true if we should copy from swift error to the final vreg as specified
-// by SwiftErrorWorklist.
-static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI,
- FunctionLoweringInfo &FuncInfo) {
- if (!TLI.supportSwiftError())
- return false;
- return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB);
-}
-
// Remove local value instructions starting from the instruction after
// SavedLastLocalValue to the current function insert point.
void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1357,10 +1340,6 @@ bool FastISel::selectInstruction(const Instruction *I) {
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(I)) {
- // If we need to materialize any vreg from worklist, we bail out of
- // FastISel.
- if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo))
- return false;
if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
// PHI node handling may have generated local value instructions,
// even though it failed to handle all PHI nodes.
@@ -1444,7 +1423,7 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
// fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
- TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
+ TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
if (FuncInfo.BPI) {
@@ -1679,7 +1658,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo,
bool SkipTargetIndependentISel)
: FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
- MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
@@ -1723,18 +1702,6 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
return 0;
}
-unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
- bool /*Op0IsKill*/,
- const ConstantFP * /*FPImm*/) {
- return 0;
-}
-
-unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
- bool /*Op0IsKill*/, unsigned /*Op1*/,
- bool /*Op1IsKill*/, uint64_t /*Imm*/) {
- return 0;
-}
-
/// This method is a wrapper of fastEmit_ri. It first tries to emit an
/// instruction with an immediate operand using fastEmit_ri.
/// If that fails, it materializes the immediate into a register and try
@@ -2181,6 +2148,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ bool IsDereferenceable =
+ I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
AAMDNodes AAInfo;
@@ -2195,6 +2164,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
Flags |= MachineMemOperand::MOVolatile;
if (IsNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+ if (IsDereferenceable)
+ Flags |= MachineMemOperand::MODereferenceable;
if (IsInvariant)
Flags |= MachineMemOperand::MOInvariant;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index e669ffc..377a523 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -98,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
Fn->isVarArg(), Outs, Fn->getContext());
// If this personality uses funclets, we need to do a bit more work.
- DenseMap<const AllocaInst *, int *> CatchObjects;
+ DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
EHPersonality Personality = classifyEHPersonality(
Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
if (isFuncletEHPersonality(Personality)) {
@@ -115,7 +114,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
if (const AllocaInst *AI = H.CatchObj.Alloca)
- CatchObjects.insert({AI, &H.CatchObj.FrameIndex});
+ CatchObjects.insert({AI, {}}).first->second.push_back(
+ &H.CatchObj.FrameIndex);
else
H.CatchObj.FrameIndex = INT_MAX;
}
@@ -125,11 +125,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Initialize the mapping of values to registers. This is only set up for
// instruction values that are used outside of the block that defines
// them.
- Function::const_iterator BB = Fn->begin(), EB = Fn->end();
- for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ for (const BasicBlock &BB : *Fn) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Type *Ty = AI->getAllocatedType();
unsigned Align =
std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
@@ -138,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
// do this if there is an extra alignment requirement.
- if (AI->isStaticAlloca() &&
+ if (AI->isStaticAlloca() &&
(TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
@@ -148,18 +146,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
int FrameIndex = INT_MAX;
auto Iter = CatchObjects.find(AI);
if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
- FrameIndex = MF->getFrameInfo()->CreateFixedObject(
+ FrameIndex = MF->getFrameInfo().CreateFixedObject(
TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
- MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align);
+ MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
} else {
FrameIndex =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+ MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
}
StaticAllocaMap[AI] = FrameIndex;
// Update the catch handler information.
- if (Iter != CatchObjects.end())
- *Iter->second = FrameIndex;
+ if (Iter != CatchObjects.end()) {
+ for (int *CatchObjPtr : Iter->second)
+ *CatchObjPtr = FrameIndex;
+ }
} else {
// FIXME: Overaligned static allocas should be grouped into
// a single dynamic allocation instead of using a separate
@@ -167,20 +167,19 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (Align <= StackAlign)
Align = 0;
// Inform the Frame Information that we have variable-sized objects.
- MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+ MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
}
}
// Look for inline asm that clobbers the SP register.
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&*I);
+ ImmutableCallSite CS(&I);
if (isa<InlineAsm>(CS.getCalledValue())) {
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
- for (size_t I = 0, E = Ops.size(); I != E; ++I) {
- TargetLowering::AsmOperandInfo &Op = Ops[I];
+ for (TargetLowering::AsmOperandInfo &Op : Ops) {
if (Op.Type == InlineAsm::isClobber) {
// Clobbers don't have SDValue operands, hence SDValue().
TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
@@ -188,7 +187,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
Op.ConstraintVT);
if (PhysReg.first == SP)
- MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
}
}
}
@@ -197,28 +196,28 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for calls to the @llvm.va_start intrinsic. We can omit some
// prologue boilerplate for variadic functions that don't examine their
// arguments.
- if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::vastart)
- MF->getFrameInfo()->setHasVAStart(true);
+ MF->getFrameInfo().setHasVAStart(true);
}
// If we have a musttail call in a variadic function, we need to ensure we
// forward implicit register parameters.
- if (const auto *CI = dyn_cast<CallInst>(I)) {
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
if (CI->isMustTailCall() && Fn->isVarArg())
- MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
}
// Mark values used outside their block as exported, by allocating
// a virtual register for them.
- if (isUsedOutsideOfDefiningBlock(&*I))
- if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
- InitializeRegForValue(&*I);
+ if (isUsedOutsideOfDefiningBlock(&I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
+ InitializeRegForValue(&I);
// Collect llvm.dbg.declare information. This is done now instead of
// during the initial isel pass through the IR so that it is done
// in a predictable order.
- if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
assert(DI->getVariable() && "Missing variable");
assert(DI->getDebugLoc() && "Missing location");
if (MMI.hasDebugInfo()) {
@@ -234,7 +233,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
StaticAllocaMap.find(AI);
if (SI != StaticAllocaMap.end()) { // Check for VLAs.
int FI = SI->second;
- MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
+ MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
FI, DI->getDebugLoc());
}
}
@@ -243,47 +242,52 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
// Decide the preferred extend type for a value.
- PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
+ PreferredExtendType[&I] = getPreferredExtendForValue(&I);
}
+ }
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
- for (BB = Fn->begin(); BB != EB; ++BB) {
+ for (const BasicBlock &BB : *Fn) {
// Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
// are really data, and no instructions can live here.
- if (BB->isEHPad()) {
- const Instruction *I = BB->getFirstNonPHI();
+ if (BB.isEHPad()) {
+ const Instruction *PadInst = BB.getFirstNonPHI();
// If this is a non-landingpad EH pad, mark this function as using
// funclets.
// FIXME: SEH catchpads do not create funclets, so we could avoid setting
// this in such cases in order to improve frame layout.
- if (!isa<LandingPadInst>(I)) {
- MMI.setHasEHFunclets(true);
- MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ if (!isa<LandingPadInst>(PadInst)) {
+ MF->setHasEHFunclets(true);
+ MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
}
- if (isa<CatchSwitchInst>(I)) {
- assert(&*BB->begin() == I &&
+ if (isa<CatchSwitchInst>(PadInst)) {
+ assert(&*BB.begin() == PadInst &&
"WinEHPrepare failed to remove PHIs from imaginary BBs");
continue;
}
- if (isa<FuncletPadInst>(I))
- assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+ if (isa<FuncletPadInst>(PadInst))
+ assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
}
- MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
- MBBMap[&*BB] = MBB;
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB);
+ MBBMap[&BB] = MBB;
MF->push_back(MBB);
// Transfer the address-taken flag. This is necessary because there could
// be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
// the first one should be marked.
- if (BB->hasAddressTaken())
+ if (BB.hasAddressTaken())
MBB->setHasAddressTaken();
+ // Mark landing pad blocks.
+ if (BB.isEHPad())
+ MBB->setIsEHPad();
+
// Create Machine PHI nodes for LLVM PHI nodes, lowering them as
// appropriate.
- for (BasicBlock::const_iterator I = BB->begin();
+ for (BasicBlock::const_iterator I = BB.begin();
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (PN->use_empty()) continue;
@@ -297,8 +301,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
- for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
- EVT VT = ValueVTs[vti];
+ for (EVT VT : ValueVTs) {
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0; i != NumRegisters; ++i)
@@ -308,16 +311,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
}
}
- // Mark landing pad blocks.
- SmallVector<const LandingPadInst *, 4> LPads;
- for (BB = Fn->begin(); BB != EB; ++BB) {
- const Instruction *FNP = BB->getFirstNonPHI();
- if (BB->isEHPad() && MBBMap.count(&*BB))
- MBBMap[&*BB]->setIsEHPad();
- if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
- LPads.push_back(LPI);
- }
-
if (!isFuncletEHPersonality(Personality))
return;
@@ -541,75 +534,26 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
return VReg;
}
-/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
-/// being passed to this variadic function, and set the MachineModuleInfo's
-/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
-/// reference to _fltused on Windows, which will link in MSVCRT's
-/// floating-point support.
-void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
- MachineModuleInfo *MMI)
-{
- FunctionType *FT = cast<FunctionType>(
- I.getCalledValue()->getType()->getContainedType(0));
- if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type* T = I.getArgOperand(i)->getType();
- for (auto i : post_order(T)) {
- if (i->isFloatingPointTy()) {
- MMI->setUsesVAFloatArgument(true);
- return;
- }
- }
- }
- }
-}
-
-/// AddLandingPadInfo - Extract the exception handling information from the
-/// landingpad instruction and add them to the specified machine module info.
-void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
- MachineBasicBlock *MBB) {
- if (const auto *PF = dyn_cast<Function>(
- I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
- MMI.addPersonality(PF);
-
- if (I.isCleanup())
- MMI.addCleanup(MBB);
-
- // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
- // but we need to do it this way because of how the DWARF EH emitter
- // processes the clauses.
- for (unsigned i = I.getNumClauses(); i != 0; --i) {
- Value *Val = I.getClause(i - 1);
- if (I.isCatch(i - 1)) {
- MMI.addCatchTypeInfo(MBB,
- dyn_cast<GlobalValue>(Val->stripPointerCasts()));
- } else {
- // Add filters in a list.
- Constant *CVal = cast<Constant>(Val);
- SmallVector<const GlobalValue*, 4> FilterList;
- for (User::op_iterator
- II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
- MMI.addFilterTypeInfo(MBB, FilterList);
- }
- }
-}
-
-unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB,
- const Value* Val) const {
- // Find the index in SwiftErrorVals.
- SwiftErrorValues::const_iterator I =
- std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
- assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
- return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()];
+unsigned
+FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
+ const Value *Val) {
+ auto Key = std::make_pair(MBB, Val);
+ auto It = SwiftErrorVRegDefMap.find(Key);
+ // If this is the first use of this swifterror value in this basic block,
+ // create a new virtual register.
+ // After we processed all basic blocks we will satisfy this "upwards exposed
+ // use" by inserting a copy or phi at the beginning of this block.
+ if (It == SwiftErrorVRegDefMap.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+ SwiftErrorVRegDefMap[Key] = VReg;
+ SwiftErrorVRegUpwardsUse[Key] = VReg;
+ return VReg;
+ } else return It->second;
}
-void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB,
- const Value* Val, unsigned VReg) {
- // Find the index in SwiftErrorVals.
- SwiftErrorValues::iterator I =
- std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
- assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
- SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg;
+void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
+ const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
+ SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c8af73a..4a9042c 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -330,16 +330,24 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
// shrink VReg's register class within reason. For example, if VReg == GR32
// and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
if (II) {
- const TargetRegisterClass *DstRC = nullptr;
+ const TargetRegisterClass *OpRC = nullptr;
if (IIOpNum < II->getNumOperands())
- DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
- assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) &&
- "Expected VReg");
- if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
- BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
- VReg = NewVReg;
+ OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+
+ if (OpRC) {
+ const TargetRegisterClass *ConstrainedRC
+ = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+ if (!ConstrainedRC) {
+ OpRC = TRI->getAllocatableClass(OpRC);
+ assert(OpRC && "Constraints cannot be fulfilled for allocation");
+ unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ } else {
+ assert(ConstrainedRC->isAllocatable() &&
+ "Constraining an allocatable VReg produced an unallocatable class?");
+ }
}
}
@@ -494,8 +502,17 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const TargetRegisterClass *TRC =
TLI->getRegClassFor(Node->getSimpleValueType(0));
- unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned Reg;
+ MachineInstr *DefMI;
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
+ if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ Reg = R->getReg();
+ DefMI = nullptr;
+ } else {
+ Reg = getVR(Node->getOperand(0), VRBaseMap);
+ DefMI = MRI->getVRegDef(Reg);
+ }
+
unsigned SrcReg, DstReg, DefSubIdx;
if (DefMI &&
TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
@@ -511,20 +528,26 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
MRI->clearKillFlags(SrcReg);
} else {
- // VReg may not support a SubIdx sub-register, and we may need to
+ // Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- VReg = ConstrainForSubReg(VReg, SubIdx,
- Node->getOperand(0).getSimpleValueType(),
- Node->getDebugLoc());
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Reg = ConstrainForSubReg(Reg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
// Create the extract_subreg machine instruction.
- BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ MachineInstrBuilder CopyMI =
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ CopyMI.addReg(Reg, 0, SubIdx);
+ else
+ CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
}
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 18ad910..b002825 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -259,19 +259,25 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
(VT == MVT::f64) ? MVT::i64 : MVT::i32);
}
+ APFloat APF = CFP->getValueAPF();
EVT OrigVT = VT;
EVT SVT = VT;
- while (SVT != MVT::f32 && SVT != MVT::f16) {
- SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
- if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
- // Only do this if the target has a native EXTLOAD instruction from
- // smaller type.
- TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
- TLI.ShouldShrinkFPConstant(OrigVT)) {
- Type *SType = SVT.getTypeForEVT(*DAG.getContext());
- LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
- VT = SVT;
- Extend = true;
+
+ // We don't want to shrink SNaNs. Converting the SNaN back to its real type
+ // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
+ if (!APF.isSignaling()) {
+ while (SVT != MVT::f32 && SVT != MVT::f16) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
}
}
@@ -324,8 +330,6 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
// supported by the target.
EVT VT = Tmp1.getValueType();
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = Tmp3.getValueType();
- EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.CreateStackTemporary(VT);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -335,13 +339,8 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
DAG.getEntryNode(), dl, Tmp1, StackPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
- // Truncate or zero extend offset to target pointer type.
- Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
- // Add the offset to the index.
- unsigned EltSize = EltVT.getSizeInBits()/8;
- Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
- DAG.getConstant(EltSize, dl, IdxVT));
- SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3);
+
// Store the scalar value.
Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
// Load the updated vector.
@@ -795,7 +794,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Custom:
isCustom = true;
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case TargetLowering::Legal: {
Value = SDValue(Node, 0);
Chain = SDValue(Node, 1);
@@ -1013,6 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::ADJUST_TRAMPOLINE:
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
+ case ISD::ADDROFRETURNADDR:
// These operations lie about being legal: when they claim to be legal,
// they should actually be custom-lowered.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1061,35 +1061,41 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SRL:
case ISD::SRA:
case ISD::ROTL:
- case ISD::ROTR:
+ case ISD::ROTR: {
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Node->getOperand(1).getValueType().isVector()) {
- SDValue SAO =
- DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
- Node->getOperand(1));
- HandleSDNode Handle(SAO);
- LegalizeOp(SAO.getNode());
- NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
- Handle.getValue());
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!Op1.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ // Do not try to legalize SAO here! It will be automatically legalized
+ // in the next round.
+ if (SAO != Op1)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO);
}
- break;
+ }
+ break;
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
- case ISD::SHL_PARTS:
+ case ISD::SHL_PARTS: {
// Legalizing shifts/rotates requires adjusting the shift amount
// to the appropriate width.
- if (!Node->getOperand(2).getValueType().isVector()) {
- SDValue SAO =
- DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
- Node->getOperand(2));
- HandleSDNode Handle(SAO);
- LegalizeOp(SAO.getNode());
- NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
- Node->getOperand(1),
- Handle.getValue());
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ if (!Op2.getValueType().isVector()) {
+ SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2);
+ // The getShiftAmountOperand() may create a new operand node or
+ // return the existing one. If new operand is created we need
+ // to update the parent node.
+ if (SAO != Op2)
+ NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
}
- break;
+ }
+ break;
}
if (NewNode != Node) {
@@ -1118,12 +1124,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
ReplaceNode(Node, ResultVals.data());
return;
}
+ LLVM_FALLTHROUGH;
}
- // FALL THROUGH
case TargetLowering::Expand:
if (ExpandNode(Node))
return;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case TargetLowering::LibCall:
ConvertNodeToLibcall(Node);
return;
@@ -1196,21 +1202,16 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
}
}
+ EVT VecVT = Vec.getValueType();
+
if (!Ch.getNode()) {
// Store the value to a temporary stack slot, then LOAD the returned part.
- StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ StackPtr = DAG.CreateStackTemporary(VecVT);
Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
MachinePointerInfo());
}
- // Add the offset to the index.
- unsigned EltSize =
- Vec.getValueType().getVectorElementType().getSizeInBits()/8;
- Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
- DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
-
- Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
- StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
SDValue NewLoad;
@@ -1220,7 +1221,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
else
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
MachinePointerInfo(),
- Vec.getValueType().getVectorElementType());
+ VecVT.getVectorElementType());
// Replace the chain going out of the store, by the one out of the load.
DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
@@ -1244,8 +1245,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDLoc dl(Op);
// Store the value to a temporary stack slot, then LOAD the returned part.
-
- SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ EVT VecVT = Vec.getValueType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
@@ -1254,17 +1255,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Then store the inserted part.
-
- // Add the offset to the index.
- unsigned EltSize =
- Vec.getValueType().getVectorElementType().getSizeInBits()/8;
-
- Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
- DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
- Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
-
- SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
- StackPtr);
+ SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
// Store the subvector.
Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
@@ -1593,6 +1584,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
break;
}
// Fallthrough if we are unsigned integer.
+ LLVM_FALLTHROUGH;
case ISD::SETLE:
case ISD::SETGT:
case ISD::SETGE:
@@ -1650,7 +1642,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SrcSize = SrcOp.getValueSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
unsigned DestSize = DestVT.getSizeInBits();
Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
@@ -2521,13 +2513,56 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
}
-/// Open code the operations for BITREVERSE.
+/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
unsigned Sz = VT.getScalarSizeInBits();
- SDValue Tmp, Tmp2;
+ SDValue Tmp, Tmp2, Tmp3;
+
+ // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+ // and finally the i1 pairs.
+ // TODO: We can easily support i4/i2 legal types if any target ever does.
+ if (Sz >= 8 && isPowerOf2_32(Sz)) {
+ // Create the masks - repeating the pattern every byte.
+ APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
+ APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
+ for (unsigned J = 0; J != Sz; J += 8) {
+ MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
+ MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
+ MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
+ MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
+ MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
+ MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+ }
+
+ // BSWAP if the type is wider than a single byte.
+ Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+ // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+ // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ return Tmp;
+ }
+
Tmp = DAG.getConstant(0, dl, VT);
for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
if (I < J)
@@ -2551,7 +2586,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.getSimpleVT().getScalarType().SimpleTy) {
default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2780,10 +2815,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Swap = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
Node->getOperand(0), Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ cast<AtomicSDNode>(Node)->getMemOperand());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
break;
@@ -2794,9 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2),
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ cast<AtomicSDNode>(Node)->getMemOperand());
Results.push_back(Swap.getValue(1));
break;
}
@@ -2808,10 +2838,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Res = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
- Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getSuccessOrdering(),
- cast<AtomicSDNode>(Node)->getFailureOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand());
SDValue ExtRes = Res;
SDValue LHS = Res;
@@ -2879,15 +2906,32 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::SIGN_EXTEND_INREG: {
- // NOTE: we could fall back on load/store here too for targets without
- // SAR. However, it is doubtful that any exist.
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
+
+ // An in-register sign-extend of a boolean is a negation:
+ // 'true' (1) sign-extended is -1.
+ // 'false' (0) sign-extended is 0.
+ // However, we must mask the high bits of the source operand because the
+ // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
+
+ // TODO: Do this for vectors too?
+ if (ExtraVT.getSizeInBits() == 1) {
+ SDValue One = DAG.getConstant(1, dl, VT);
+ SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
+ Results.push_back(Neg);
+ break;
+ }
+
+ // NOTE: we could fall back on load/store here too for targets without
+ // SRA. However, it is doubtful that any exist.
EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
if (VT.isVector())
ShiftAmountTy = VT;
- unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
- ExtraVT.getScalarType().getSizeInBits();
+ unsigned BitsDiff = VT.getScalarSizeInBits() -
+ ExtraVT.getScalarSizeInBits();
SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
Node->getOperand(0), ShiftCst);
@@ -3248,17 +3292,49 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
case ISD::MULHU:
case ISD::MULHS: {
- unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
- ISD::SMUL_LOHI;
+ unsigned ExpandOpcode =
+ Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI;
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
- assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
- "If this wasn't legal, it shouldn't have been created!");
+
Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
Results.push_back(Tmp1.getValue(1));
break;
}
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ MVT VT = LHS.getSimpleValueType();
+ unsigned MULHOpcode =
+ Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS;
+
+ if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) {
+ Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS));
+ Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS));
+ break;
+ }
+
+ SmallVector<SDValue, 4> Halves;
+ EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
+ assert(TLI.isTypeLegal(HalfType));
+ if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+ HalfType, DAG,
+ TargetLowering::MulExpansionKind::Always)) {
+ for (unsigned i = 0; i < 2; ++i) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]);
+ SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]);
+ SDValue Shift = DAG.getConstant(
+ HalfType.getScalarSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ break;
+ }
case ISD::MUL: {
EVT VT = Node->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3293,7 +3369,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
- TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
+ TLI.expandMUL(Node, Lo, Hi, HalfType, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) {
Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
SDValue Shift =
@@ -3416,8 +3493,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// pre-lowered to the correct types. This all depends upon WideVT not
// being a legal type for the architecture and thus has to be split to
// two arguments.
- SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ SDValue Ret;
+ if(DAG.getDataLayout().isLittleEndian()) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ } else {
+ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ }
BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
DAG.getIntPtrConstant(0, dl));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
@@ -3441,6 +3528,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
DAG.getConstant(0, dl, VT), ISD::SETNE);
}
+
+ // Truncate the result if SetCC returns a larger type than needed.
+ EVT RType = Node->getValueType(1);
+ if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
+
+ assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
+ "Unexpected result type for S/UMULO legalization");
+
Results.push_back(BottomHalf);
Results.push_back(TopHalf);
break;
@@ -3476,9 +3572,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Table = Node->getOperand(1);
SDValue Index = Node->getOperand(2);
- EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
-
const DataLayout &TD = DAG.getDataLayout();
+ EVT PTy = TLI.getPointerTy(TD);
+
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
@@ -3492,7 +3588,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ISD::SEXTLOAD, dl, PTy, Chain, Addr,
MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
Addr = LD;
- if (TM.isPositionIndependent()) {
+ if (TLI.isJumpTableRelative()) {
// For PIC, the sequence is:
// BRIND(load(Jumptable + index) + RelocBase)
// RelocBase can be JumpTable, GOT or some sort of global base.
@@ -4019,10 +4115,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
break;
+ case ISD::BITREVERSE:
case ISD::BSWAP: {
unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Tmp1 = DAG.getNode(
ISD::SRL, dl, NVT, Tmp1,
DAG.getConstant(DiffBits, dl,
@@ -4073,6 +4170,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ReplacedNode(Node);
break;
}
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::UDIV:
+ case ISD::UREM:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -4082,7 +4183,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
TruncOp = ISD::BITCAST;
} else {
assert(OVT.isInteger() && "Cannot promote logic operation");
- ExtOp = ISD::ANY_EXTEND;
+
+ switch (Node->getOpcode()) {
+ default:
+ ExtOp = ISD::ANY_EXTEND;
+ break;
+ case ISD::SDIV:
+ case ISD::SREM:
+ ExtOp = ISD::SIGN_EXTEND;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ ExtOp = ISD::ZERO_EXTEND;
+ break;
+ }
TruncOp = ISD::TRUNCATE;
}
// Promote each of the values to the new type.
@@ -4093,6 +4207,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
break;
}
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: {
+ // Promote to a multiply in a wider integer type.
+ unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2);
+
+ auto &DL = DAG.getDataLayout();
+ unsigned OriginalSize = OVT.getScalarSizeInBits();
+ Tmp2 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT)));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+ break;
+ }
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector() ||
@@ -4351,7 +4483,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
MVT EltVT = OVT.getVectorElementType();
MVT NewEltVT = NVT.getVectorElementType();
- // Handle bitcasts to different vector type with the smae total bit size.
+ // Handle bitcasts to different vector type with the same total bit size.
//
// e.g. v2i64 = scalar_to_vector x:i64
// =>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 31ebf7b..72b56d8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -632,7 +632,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
SDLoc dl(N);
auto MMOFlags =
- L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ L->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
SDValue NewL;
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
@@ -1465,7 +1466,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
// TODO: Are there fast-math-flags to propagate to this FADD?
Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
APInt(128, Parts)),
dl, MVT::ppcf128));
Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
@@ -1630,7 +1631,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
- APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
// FIXME: generated code sucks.
@@ -2085,7 +2086,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
// Load the value as an integer value with the same number of bits.
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
auto MMOFlags =
- L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ L->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
SDLoc(N), L->getChain(), L->getBasePtr(),
L->getOffset(), L->getPointerInfo(), IVT,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9a18943..dc436ce 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -57,8 +57,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
- case ISD::CONVERT_RNDSAT:
- Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
@@ -102,6 +100,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
+
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -183,8 +186,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(), ResVT,
N->getChain(), N->getBasePtr(),
- N->getMemOperand(), N->getOrdering(),
- N->getSynchScope());
+ N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -196,8 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getMemOperand(), N->getOrdering(),
- N->getSynchScope());
+ Op2, N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -220,8 +221,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
SDValue Res = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
- N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
- N->getSynchScope());
+ N->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
return Res.getValue(1);
@@ -233,8 +233,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
SDValue Res = DAG.getAtomicCmpSwap(
N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
- N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
- N->getFailureOrdering(), N->getSynchScope());
+ N->getBasePtr(), Op2, Op3, N->getMemOperand());
// Update the use to N with the newly created Res.
for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
ReplaceValueWith(SDValue(N, i), Res.getValue(i));
@@ -353,18 +352,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
return Result;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
- assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
- CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
- CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
- "can only promote integers");
- EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0),
- N->getOperand(1), N->getOperand(2),
- N->getOperand(3), N->getOperand(4), CvtCode);
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
@@ -427,6 +414,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
+ //
+ // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
+ // before legalization: fp-to-uint16, 65534. -> 0xfffe
+ // after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
@@ -507,7 +498,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
N->getIndex()};
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -882,8 +873,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
- case ISD::CONVERT_RNDSAT:
- Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
@@ -946,14 +935,16 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
SDValue OpL = GetPromotedInteger(NewLHS);
SDValue OpR = GetPromotedInteger(NewRHS);
- // We would prefer to promote the comparison operand with sign extension,
- // if we find the operand is actually to truncate an AssertSext. With this
- // optimization, we can avoid inserting real truncate instruction, which
- // is redudant eventually.
- if (OpL->getOpcode() == ISD::AssertSext &&
- cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
- OpR->getOpcode() == ISD::AssertSext &&
- cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+ // We would prefer to promote the comparison operand with sign extension.
+ // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+ // than the width of NewLHS/NewRH, we can avoid inserting real truncate
+ // instruction, which is redudant eventually.
+ unsigned OpLEffectiveBits =
+ OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+ unsigned OpREffectiveBits =
+ OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() &&
+ OpREffectiveBits <= NewRHS.getValueSizeInBits()) {
NewLHS = OpL;
NewRHS = OpR;
} else {
@@ -990,8 +981,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
- N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
- N->getOrdering(), N->getSynchScope());
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
}
SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
@@ -1051,8 +1041,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be
// truncated away.
- assert(N->getOperand(0).getValueType().getSizeInBits() >=
- N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ assert(N->getOperand(0).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
"Type of inserted value narrower than vector element type!");
SmallVector<SDValue, 16> NewOps;
@@ -1062,18 +1052,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
- assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
- CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
- CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
- "can only promote integer arguments");
- SDValue InOp = GetPromotedInteger(N->getOperand(0));
- return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp,
- N->getOperand(1), N->getOperand(2),
- N->getOperand(3), N->getOperand(4), CvtCode);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
unsigned OpNo) {
if (OpNo == 1) {
@@ -1081,8 +1059,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
// have to match the vector element type.
// Check that any extra bits introduced will be truncated away.
- assert(N->getOperand(1).getValueType().getSizeInBits() >=
- N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ assert(N->getOperand(1).getValueSizeInBits() >=
+ N->getValueType(0).getScalarSizeInBits() &&
"Type of inserted value narrower than vector element type!");
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
GetPromotedInteger(N->getOperand(1)),
@@ -1210,7 +1188,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
N->getMemoryVT(), N->getMemOperand(),
- TruncateStore);
+ TruncateStore, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -1233,7 +1211,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+
+ SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ // updated in place.
+ if (Res.getNode() == N)
+ return Res;
+
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return SDValue();
}
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1314,6 +1300,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
@@ -1352,8 +1339,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
SDValue Tmp = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
- AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
- AN->getSynchScope());
+ AN->getMemOperand());
// Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
// success simply by comparing the loaded value against the ingoing
@@ -1508,8 +1494,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
- unsigned ShBits = ShTy.getScalarType().getSizeInBits();
- unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+ unsigned ShBits = ShTy.getScalarSizeInBits();
+ unsigned NVTBits = NVT.getScalarSizeInBits();
assert(isPowerOf2_32(NVTBits) &&
"Expanded integer type size not a power of two!");
SDLoc dl(N);
@@ -1700,7 +1686,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
EVT CCT = getSetCCResultType(NVT);
// Hi part is always the same op
- Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
// We need to know whether to select Lo part that corresponds to 'winning'
// Hi part or if Hi parts are equal.
@@ -1711,7 +1697,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
// Recursed Lo part if Hi parts are equal, this uses unsigned version
- SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
}
@@ -1774,7 +1760,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
switch (BoolType) {
case TargetLoweringBase::UndefinedBooleanContent:
OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
- // Fallthrough
+ LLVM_FALLTHROUGH;
case TargetLoweringBase::ZeroOrOneBooleanContent:
Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
break;
@@ -2002,6 +1988,19 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
Hi = DAG.getConstant(0, dl, NVT);
}
+void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+ Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+ // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+}
+
void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
@@ -2065,7 +2064,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (ExtType == ISD::SEXTLOAD) {
// The high part is obtained by SRA'ing all but one of the bits of the
// lo part.
- unsigned LoSize = Lo.getValueType().getSizeInBits();
+ unsigned LoSize = Lo.getValueSizeInBits();
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
DAG.getConstant(LoSize - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
@@ -2166,7 +2165,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
GetExpandedInteger(N->getOperand(0), LL, LH);
GetExpandedInteger(N->getOperand(1), RL, RH);
- if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH))
+ if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
+ TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+ LL, LH, RL, RH))
return;
// If nothing else, we can make a libcall.
@@ -2180,7 +2181,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
else if (VT == MVT::i128)
LC = RTLIB::MUL_I128;
- if (LC == RTLIB::UNKNOWN_LIBCALL) {
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
// We'll expand the multiplication by brute force because we have no other
// options. This is a trivially-generalized version of the code from
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
@@ -2354,8 +2355,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- assert(ShiftTy.getScalarType().getSizeInBits() >=
- Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+ assert(ShiftTy.getScalarSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits()) &&
"ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -2436,8 +2437,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
"Operand over promoted?");
// Split the promoted operand. This will simplify when it is expanded.
SplitInteger(Res, Lo, Hi);
- unsigned ExcessBits =
- Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
ExcessBits)));
@@ -2458,13 +2458,12 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// The high part gets the sign extension from the lo-part. This handles
// things like sextinreg V:i64 from i8.
Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
- DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
+ DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
} else {
// For example, extension of an i48 to an i64. Leave the low part alone,
// sext_inreg the high part.
- unsigned ExcessBits =
- EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
ExcessBits)));
@@ -2690,8 +2689,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
"Operand over promoted?");
// Split the promoted operand. This will simplify when it is expanded.
SplitInteger(Res, Lo, Hi);
- unsigned ExcessBits =
- Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getZeroExtendInReg(Hi, dl,
EVT::getIntegerVT(*DAG.getContext(),
ExcessBits));
@@ -2707,10 +2705,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
SDValue Swap = DAG.getAtomicCmpSwap(
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
- N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getSynchScope());
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
@@ -2833,51 +2828,51 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
case ISD::SETUGE: LowCC = ISD::SETUGE; break;
}
- // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
- // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
- // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+ // LoCmp = lo(op1) < lo(op2) // Always unsigned comparison
+ // HiCmp = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? LoCmp : HiCmp;
// NOTE: on targets without efficient SELECT of bools, we can always use
// this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
nullptr);
- SDValue Tmp1, Tmp2;
+ SDValue LoCmp, HiCmp;
if (TLI.isTypeLegal(LHSLo.getValueType()) &&
TLI.isTypeLegal(RHSLo.getValueType()))
- Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
- LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
- if (!Tmp1.getNode())
- Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
- LHSLo, RHSLo, LowCC);
+ LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!LoCmp.getNode())
+ LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, LowCC);
if (TLI.isTypeLegal(LHSHi.getValueType()) &&
TLI.isTypeLegal(RHSHi.getValueType()))
- Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
- if (!Tmp2.getNode())
- Tmp2 = DAG.getNode(ISD::SETCC, dl,
- getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, DAG.getCondCode(CCCode));
-
- ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
- ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
- if ((Tmp1C && Tmp1C->isNullValue()) ||
- (Tmp2C && Tmp2C->isNullValue() &&
- (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
- CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
- (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
- (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
- CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
- // low part is known false, returns high part.
+ HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!HiCmp.getNode())
+ HiCmp =
+ DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
+ ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
+
+ bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+
+ if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
+ (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
+ (LoCmpC && LoCmpC->isNullValue())))) {
// For LE / GE, if high part is known false, ignore the low part.
- // For LT / GT, if high part is known true, ignore the low part.
- NewLHS = Tmp2;
+ // For LT / GT: if low part is known false, return the high part.
+ // if high part is known true, ignore the low part.
+ NewLHS = HiCmp;
NewRHS = SDValue();
return;
}
if (LHSHi == RHSHi) {
// Comparing the low bits is enough.
- NewLHS = Tmp1;
+ NewLHS = LoCmp;
NewRHS = SDValue();
return;
}
@@ -2922,8 +2917,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
if (!NewLHS.getNode())
NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
LHSHi, RHSHi, ISD::SETEQ);
- NewLHS = DAG.getSelect(dl, Tmp1.getValueType(),
- NewLHS, Tmp1, Tmp2);
+ NewLHS = DAG.getSelect(dl, LoCmp.getValueType(),
+ NewLHS, LoCmp, HiCmp);
NewRHS = SDValue();
}
@@ -3198,9 +3193,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
cast<AtomicSDNode>(N)->getMemoryVT(),
N->getOperand(0),
N->getOperand(1), N->getOperand(2),
- cast<AtomicSDNode>(N)->getMemOperand(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getSynchScope());
+ cast<AtomicSDNode>(N)->getMemOperand());
return Swap.getValue(1);
}
@@ -3327,6 +3320,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.isVector() && "This type must be promoted to a vector type");
+
+ SDLoc dl(N);
+
+ // For operands whose TypeAction is to promote, extend the promoted node
+ // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
+ // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
+ // type..
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Promoted;
+
+ switch(N->getOpcode()) {
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Promoted = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Promoted = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Promoted = GetPromotedInteger(N->getOperand(0));
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
+ }
+
+ // Directly extend to the appropriate transform-to type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 144bed2..cf19d75 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -117,6 +117,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
Mapped |= 64;
if (WidenedVectors.find(Res) != WidenedVectors.end())
Mapped |= 128;
+ if (PromotedFloats.find(Res) != PromotedFloats.end())
+ Mapped |= 256;
if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -159,6 +161,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << " SplitVectors";
if (Mapped & 128)
dbgs() << " WidenedVectors";
+ if (Mapped & 256)
+ dbgs() << " PromotedFloats";
dbgs() << "\n";
llvm_unreachable(nullptr);
}
@@ -484,7 +488,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// updated after all operands have been analyzed. Since this is rare,
// the code tries to minimize overhead in the non-morphing case.
- SmallVector<SDValue, 8> NewOps;
+ std::vector<SDValue> NewOps;
unsigned NumProcessed = 0;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDValue OrigOp = N->getOperand(i);
@@ -500,7 +504,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
NewOps.push_back(Op);
} else if (Op != OrigOp) {
// This is the first operand to change - add all operands so far.
- NewOps.append(N->op_begin(), N->op_begin() + i);
+ NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
NewOps.push_back(Op);
}
}
@@ -794,8 +798,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// Note that in some cases vector operation operands may be greater than
// the vector element type. For example BUILD_VECTOR of type <1 x i1> with
// a constant i8 operand.
- assert(Result.getValueType().getSizeInBits() >=
- Op.getValueType().getVectorElementType().getSizeInBits() &&
+ assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -905,7 +908,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
/// Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
- unsigned BitWidth = Op.getValueType().getSizeInBits();
+ unsigned BitWidth = Op.getValueSizeInBits();
return DAG.getNode(ISD::BITCAST, SDLoc(Op),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
@@ -913,7 +916,7 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
/// Convert to a vector of integers of the same size.
SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
assert(Op.getValueType().isVector() && "Only applies to vectors!");
- unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ unsigned EltWidth = Op.getScalarValueSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
unsigned NumElts = Op.getValueType().getVectorNumElements();
return DAG.getNode(ISD::BITCAST, SDLoc(Op),
@@ -1018,22 +1021,6 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
DAG.getIntPtrConstant(1, dl));
}
-SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
- SDValue Index) {
- SDLoc dl(Index);
- // Make sure the index type is big enough to compute in.
- Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
-
- // Calculate the element offset and add it to the pointer.
- unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
- assert(EltSize * 8 == EltVT.getSizeInBits() &&
- "Converting bits to bytes lost precision");
-
- Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
- DAG.getConstant(EltSize, dl, Index.getValueType()));
- return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
-}
-
/// Build an integer with low bits Lo and high bits Hi.
SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
// Arbitrarily use dlHi for result SDLoc
@@ -1145,7 +1132,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(Op);
assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
- Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Op.getValueSizeInBits() && "Invalid integer splitting!");
Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
DAG.getConstant(LoVT.getSizeInBits(), dl,
@@ -1157,8 +1144,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
/// size of Op's.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
SDValue &Lo, SDValue &Hi) {
- EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
- Op.getValueType().getSizeInBits()/2);
+ EVT HalfVT =
+ EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2);
SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 84ad8f8..ec55662 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -173,7 +173,6 @@ private:
/// input operand is returned.
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
@@ -242,6 +241,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntRes_BITCAST(SDNode *N);
@@ -249,7 +249,6 @@ private:
SDValue PromoteIntRes_BITREVERSE(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
- SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntRes_CTLZ(SDNode *N);
SDValue PromoteIntRes_CTPOP(SDNode *N);
SDValue PromoteIntRes_CTTZ(SDNode *N);
@@ -288,7 +287,6 @@ private:
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
- SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -339,6 +337,7 @@ private:
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -601,7 +600,6 @@ private:
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
- SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -707,7 +705,6 @@ private:
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
- SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 665180e..3682c32 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -141,11 +141,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LHS, RHS);
- Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
- EVT::getIntegerVT(
- *DAG.getContext(),
- LHS.getValueType().getSizeInBits() << 1),
- LHS, RHS));
+ Vals.push_back(DAG.getNode(
+ ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1),
+ LHS, RHS));
}
Lo = Vals[Slot++];
Hi = Vals[Slot++];
@@ -337,7 +336,8 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SDLoc dl(N);
- if (N->getValueType(0).isVector()) {
+ if (N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isInteger()) {
// An illegal expanding type is being converted to a legal vector type.
// Make a two element vector out of the expanded parts and convert that
// instead, but only if the new vector type is legal (otherwise there
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3c9cb17..d4fa20f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -106,7 +106,8 @@ class VectorLegalizer {
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
- SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
+ SDValue ExpandCTLZ(SDValue Op);
+ SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
/// \brief Implements vector promotion.
///
@@ -332,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -362,7 +365,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Result = Tmp1;
break;
}
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
}
case TargetLowering::Expand:
Result = Expand(Op);
@@ -693,9 +696,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return UnrollVSETCC(Op);
case ISD::BITREVERSE:
return ExpandBITREVERSE(Op);
+ case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
+ return ExpandCTLZ(Op);
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
+ return ExpandCTTZ_ZERO_UNDEF(Op);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -770,8 +775,8 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
SDLoc DL(Op);
EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
- unsigned BW = VT.getScalarType().getSizeInBits();
- unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ unsigned BW = VT.getScalarSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarSizeInBits();
SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
Op = Op.getOperand(0);
@@ -817,8 +822,8 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
// Now we need sign extend. Do this by shifting the elements. Even if these
// aren't legal operations, they have a better chance of being legalized
// without full scalarization than the sign extension does.
- unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
- unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
return DAG.getNode(ISD::SRA, DL, VT,
DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
@@ -951,7 +956,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// If the mask and the type are different sizes, unroll the vector op. This
// can occur when getSetCCResultType returns something that is different in
// size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
- if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
+ if (VT.getSizeInBits() != Op1.getValueSizeInBits())
return DAG.UnrollVectorOp(Op.getNode());
// Bitcast the operands to be the same type as the mask.
@@ -961,7 +966,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
+ APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
@@ -979,21 +984,20 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
- EVT SVT = VT.getScalarType();
- assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
- "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+ unsigned BW = VT.getScalarSizeInBits();
+ assert((BW == 64 || BW == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
- unsigned BW = SVT.getSizeInBits();
- SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
+ SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
// Constants to clear the upper part of the word.
// Notice that we can also use SHL+SHR, but using a constant is slightly
// faster on x86.
- uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
+ SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
// Clear upper part of LO, lower HI
SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -1010,7 +1014,6 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
}
-
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
SDLoc DL(Op);
@@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
-SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
+ }
+
+ // If CTPOP is available we can lower with a CTPOP based method:
+ // u16 ctlz(u16 x) {
+ // x |= (x >> 1);
+ // x |= (x >> 2);
+ // x |= (x >> 4);
+ // x |= (x >> 8);
+ // return ctpop(~x);
+ // }
+ // Ref: "Hacker's Delight" by Henry Warren
+ if (isPowerOf2_32(NumBitsPerElt) &&
+ TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
+ SDLoc DL(Op);
+ SDValue Res = Op.getOperand(0);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
+ Res = DAG.getNode(
+ ISD::OR, DL, VT, Res,
+ DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+
+ Res = DAG.getNOT(DL, Res, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, Res);
+ }
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
// If the non-ZERO_UNDEF version is supported we can use that instead.
- unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
- if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+ if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
SDLoc DL(Op);
- return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+ return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
}
// Otherwise go ahead and unroll.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f3adca4..6906f67 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -51,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
- case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -179,17 +178,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
return InOp;
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op0 = GetScalarizedVector(N->getOperand(0));
- return DAG.getConvertRndSat(NewVT, SDLoc(N),
- Op0, DAG.getValueType(NewVT),
- DAG.getValueType(Op0.getValueType()),
- N->getOperand(3),
- N->getOperand(4),
- cast<CvtRndSatSDNode>(N)->getCvtCode());
-}
-
SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
N->getValueType(0).getVectorElementType(),
@@ -621,7 +609,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITREVERSE:
case ISD::BSWAP:
- case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
case ISD::CTTZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -846,7 +833,6 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
GetSplitVector(Vec, Lo, Hi);
EVT VecVT = Vec.getValueType();
- EVT VecElemVT = VecVT.getVectorElementType();
unsigned VecElems = VecVT.getVectorNumElements();
unsigned SubElems = SubVec.getValueType().getVectorNumElements();
@@ -872,7 +858,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Store the new subvector into the specified index.
- SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
+ SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
@@ -882,7 +868,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr =
DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
@@ -1003,7 +989,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
- SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
Store =
@@ -1014,7 +1000,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
// Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl,
StackPtr.getValueType()));
@@ -1114,11 +1100,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
Alignment, MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
- ExtType);
+ ExtType, MLD->isExpandingLoad());
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -1126,7 +1111,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
- ExtType);
+ ExtType, MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
@@ -1237,18 +1222,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
- } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
- SDValue DTyOpLo = DAG.getValueType(LoVT);
- SDValue DTyOpHi = DAG.getValueType(HiVT);
- SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
- SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
- Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
- CvtCode);
- Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
- CvtCode);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1282,7 +1255,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = EVT::getVectorVT(
Ctx, EVT::getIntegerVT(
- Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+ Ctx, SrcVT.getScalarSizeInBits() * 2),
NumElements);
EVT SplitSrcVT =
EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
@@ -1651,7 +1624,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
// Load back the required element.
- StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo(), EltVT);
}
@@ -1769,19 +1742,18 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- N->isTruncatingStore());
-
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ N->isTruncatingStore(),
+ N->isCompressingStore());
+ Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+ N->isCompressingStore());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- N->isTruncatingStore());
+ N->isTruncatingStore(), N->isCompressingStore());
// Build a factor node to remember that this store is independent of the
// other one.
@@ -1940,8 +1912,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// if we're trying to split it at all. assert() that's true, just in case.
assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
- unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+ unsigned InElementSize = InVT.getScalarSizeInBits();
+ unsigned OutElementSize = OutVT.getScalarSizeInBits();
// If the input elements are only 1/2 the width of the result elements,
// just use the normal splitting. Our trick only work if there's room
@@ -2047,7 +2019,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
- case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
@@ -2695,86 +2666,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
}
-SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
- SDLoc dl(N);
- SDValue InOp = N->getOperand(0);
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
-
- SDValue DTyOp = DAG.getValueType(WidenVT);
- SDValue STyOp = DAG.getValueType(InWidenVT);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
- unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
- InOp = GetWidenedVector(InOp);
- InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- if (TLI.isTypeLegal(InWidenVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
- // Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- Ops[0] = InOp;
- SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
-
- InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- if (InVTNumElts % WidenNumElts == 0) {
- // Extract the input and convert the shorten input vector.
- InOp = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
- DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
- }
-
- // Otherwise unroll into some nasty scalar code and rebuild the vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = WidenVT.getVectorElementType();
- DTyOp = DAG.getValueType(EltVT);
- STyOp = DAG.getValueType(InEltVT);
-
- unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
- unsigned i;
- for (i=0; i < MinElts; ++i) {
- SDValue ExtVal = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -2881,7 +2772,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
- N->getMemOperand(), ExtType);
+ N->getMemOperand(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3317,7 +3209,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
Mask, MST->getMemoryVT(), MST->getMemOperand(),
- false);
+ false, MST->isCompressingStore());
}
SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
@@ -3547,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
- if (L->getValueType(0).isVector()) {
+ if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+ // Later code assumes the vector loads produced will be mergeable, so we
+ // must pad the final entry up to the previous width. Scalars are
+ // combined separately.
SmallVector<SDValue, 16> Loads;
Loads.push_back(L);
unsigned size = L->getValueSizeInBits(0);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1e5c4a7..ded8e68 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -631,7 +631,7 @@ SUnit *ResourcePriorityQueue::pop() {
void ResourcePriorityQueue::remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 802c459..3549ccd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1339,7 +1339,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
if (Reg) {
SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
- if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+ if (!is_contained(LRegs, Reg))
continue;
}
SU->isPending = false;
@@ -1704,8 +1704,7 @@ public:
void remove(SUnit *SU) override {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
- SU);
+ std::vector<SUnit *>::iterator I = find(Queue, SU);
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5cc8066..a058942 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -15,10 +15,20 @@
#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <string>
+#include <vector>
namespace llvm {
+
+class InstrItineraryData;
+
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
@@ -44,7 +54,7 @@ namespace llvm {
explicit ScheduleDAGSDNodes(MachineFunction &mf);
- ~ScheduleDAGSDNodes() override {}
+ ~ScheduleDAGSDNodes() override = default;
/// Run - perform scheduling.
///
@@ -131,6 +141,7 @@ namespace llvm {
unsigned DefIdx;
unsigned NodeNumDefs;
MVT ValueType;
+
public:
RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
@@ -150,6 +161,7 @@ namespace llvm {
}
void Advance();
+
private:
void InitNodeNumDefs();
};
@@ -175,6 +187,7 @@ namespace llvm {
void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
MachineBasicBlock::iterator InsertPos);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 29d11c7..e225ba8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -134,7 +134,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// we care if the resultant vector is all ones, not whether the individual
// constants are.
SDValue NotZero = N->getOperand(i);
- unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
if (CN->getAPIntValue().countTrailingOnes() < EltSize)
return false;
@@ -173,7 +173,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// We only want to check enough bits to cover the vector elements, because
// we care if the resultant vector is all zeros, not whether the individual
// constants are.
- unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
if (CN->getAPIntValue().countTrailingZeros() < EltSize)
return false;
@@ -403,7 +403,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(GA->getGlobal());
ID.AddInteger(GA->getOffset());
ID.AddInteger(GA->getTargetFlags());
- ID.AddInteger(GA->getAddressSpace());
break;
}
case ISD::BasicBlock:
@@ -521,24 +520,6 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
AddNodeIDCustom(ID, N);
}
-/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, temporalness, indexing mode, and
-/// extension/truncation information.
-///
-static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
- bool isNonTemporal, bool isInvariant) {
- assert((ConvType & 3) == ConvType &&
- "ConvType may not require more than 2 bits!");
- assert((AM & 7) == AM &&
- "AM may not require more than 3 bits!");
- return ConvType |
- (AM << 2) |
- (isVolatile << 5) |
- (isNonTemporal << 6) |
- (isInvariant << 7);
-}
-
//===----------------------------------------------------------------------===//
// SelectionDAG Class
//===----------------------------------------------------------------------===//
@@ -1030,7 +1011,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
"getZeroExtendInReg should use the vector element type instead of "
"the vector type!");
if (Op.getValueType() == VT) return Op;
- unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Imm = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
return getNode(ISD::AND, DL, Op.getValueType(), Op,
@@ -1040,7 +1021,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1051,7 +1032,7 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1062,7 +1043,7 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
EVT VT) {
assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
"The sizes of the input and result must match in order to perform the "
"extend in-register.");
assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1123,7 +1104,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
@@ -1149,7 +1130,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
- .trunc(ViaEltSizeInBits), DL,
+ .zextOrTrunc(ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));
}
@@ -1166,12 +1147,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
// This situation occurs in MIPS MSA.
SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
-
- SDValue Result = getNode(ISD::BITCAST, DL, VT,
- getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops));
- return Result;
+ return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
}
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1280,7 +1258,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
- ID.AddInteger(GV->getType()->getAddressSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
return SDValue(E, 0);
@@ -1333,7 +1310,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ Alignment = MF->getFunction()->optForSize()
+ ? getDataLayout().getABITypeAlignment(C->getType())
+ : getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1650,31 +1629,6 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
}
-SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val,
- SDValue DTy, SDValue STy, SDValue Rnd,
- SDValue Sat, ISD::CvtCode Code) {
- // If the src and dest types are the same and the conversion is between
- // integer types of the same sign or two floats, no conversion is necessary.
- if (DTy == STy &&
- (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
- return Val;
-
- FoldingSetNodeID ID;
- SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
- AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
- void* IP = nullptr;
- if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
- return SDValue(E, 0);
-
- auto *N =
- newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code);
- createOperands(N, Ops);
-
- CSEMap.InsertNode(N, IP);
- InsertNode(N);
- return SDValue(N, 0);
-}
-
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
@@ -1863,13 +1817,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
- MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned ByteSize = VT.getStoreSize();
Type *Ty = VT.getTypeForEVT(*getContext());
unsigned StackAlign =
std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
@@ -1881,8 +1835,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
unsigned Align =
std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
- MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
}
@@ -1943,29 +1897,29 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
return getUNDEF(VT);
- // fall through
+ LLVM_FALLTHROUGH;
case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT);
case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
@@ -1998,11 +1952,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
- // This predicate is not safe for vector operations.
- if (Op.getValueType().isVector())
- return false;
-
- unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
}
@@ -2016,28 +1966,244 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
return (KnownZero & Mask) == Mask;
}
+/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
+/// is less than the element bit-width of the shift node, return it.
+static const APInt *getValidShiftAmountConstant(SDValue V) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.ult(V.getScalarValueSizeInBits()))
+ return &ShAmt;
+ }
+ return nullptr;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets.
+/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
+/// those that are shared by every vector element.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, unsigned Depth) const {
- unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
+/// us to only collect the known bits that are shared by the requested vector
+/// elements.
+/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
+/// should be added when they become necessary.
+void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
if (Depth == 6)
return; // Limit search depth.
APInt KnownZero2, KnownOne2;
+ unsigned NumElts = DemandedElts.getBitWidth();
- switch (Op.getOpcode()) {
+ if (!DemandedElts)
+ return; // No demanded elts, better to assume we don't know anything.
+
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
case ISD::Constant:
// We know all of the bits for a constant!
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
KnownZero = ~KnownOne;
break;
+ case ISD::BUILD_VECTOR:
+ // Collect the known bits that are shared by every demanded vector element.
+ assert(NumElts == Op.getValueType().getVectorNumElements() &&
+ "Unexpected vector size");
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != BitWidth) {
+ assert(SrcOp.getValueSizeInBits() > BitWidth &&
+ "Expected BUILD_VECTOR implicit truncation");
+ KnownOne2 = KnownOne2.trunc(BitWidth);
+ KnownZero2 = KnownZero2.trunc(BitWidth);
+ }
+
+ // Known bits are the values that are shared by every demanded element.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
+ }
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the known bits that are shared by every vector element referenced
+ // by the shuffle.
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ int M = SVN->getMaskElt(i);
+ if (M < 0) {
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ KnownOne.clearAllBits();
+ KnownZero.clearAllBits();
+ DemandedLHS.clearAllBits();
+ DemandedRHS.clearAllBits();
+ break;
+ }
+
+ if ((unsigned)M < NumElts)
+ DemandedLHS.setBit((unsigned)M % NumElts);
+ else
+ DemandedRHS.setBit((unsigned)M % NumElts);
+ }
+ // Known bits are the values that are shared by every demanded element.
+ if (!!DemandedLHS) {
+ SDValue LHS = Op.getOperand(0);
+ computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ }
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
+ if (!!DemandedRHS) {
+ SDValue RHS = Op.getOperand(1);
+ computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ // Split DemandedElts and test each of the demanded subvectors.
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; i != NumSubVectors; ++i) {
+ APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+ DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ if (!!DemandedSub) {
+ SDValue Sub = Op.getOperand(i);
+ computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1);
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ }
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
+ }
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If we know the element index, just demand that subvector elements,
+ // otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+ computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
+ } else {
+ computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
+ }
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue N0 = Op.getOperand(0);
+ unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+
+ // Ignore bitcasts from floating point.
+ if (!N0.getValueType().isInteger())
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (BitWidth == SubBitWidth) {
+ computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+ break;
+ }
+
+ // Support big-endian targets when it becomes useful.
+ bool IsLE = getDataLayout().isLittleEndian();
+ if (!IsLE)
+ break;
+
+ // Bitcast 'small element' vector to 'large element' scalar/vector.
+ if ((BitWidth % SubBitWidth) == 0) {
+ assert(N0.getValueType().isVector() && "Expected bitcast from vector");
+
+ // Collect known bits for the (larger) output by collecting the known
+ // bits from each set of sub elements and shift these into place.
+ // We need to separately call computeKnownBits for each set of
+ // sub elements as the knownbits for each is likely to be different.
+ unsigned SubScale = BitWidth / SubBitWidth;
+ APInt SubDemandedElts(NumElts * SubScale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SubDemandedElts.setBit(i * SubScale);
+
+ for (unsigned i = 0; i != SubScale; ++i) {
+ computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i),
+ Depth + 1);
+ KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i);
+ KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i);
+ }
+ }
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ if ((SubBitWidth % BitWidth) == 0) {
+ assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+
+ // Collect known bits for the (smaller) output by collecting the known
+ // bits from the overlapping larger input elements and extracting the
+ // sub sections we actually care about.
+ unsigned SubScale = SubBitWidth / BitWidth;
+ APInt SubDemandedElts(NumElts / SubScale, 0);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i])
+ SubDemandedElts.setBit(i / SubScale);
+
+ computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1);
+
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % SubScale) * BitWidth;
+ KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth);
+ KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth);
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
+ }
+ }
+ break;
+ }
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -2045,8 +2211,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero |= KnownZero2;
break;
case ISD::OR:
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
@@ -2054,8 +2222,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne |= KnownOne2;
break;
case ISD::XOR: {
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -2065,11 +2235,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
}
case ISD::MUL: {
- computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conserative estimate for high known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
KnownOne.clearAllBits();
@@ -2089,12 +2261,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clearAllBits();
- KnownZero2.clearAllBits();
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
@@ -2105,6 +2277,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::SELECT:
computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// Only known if known in both the LHS and RHS.
@@ -2113,6 +2288,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
case ISD::SELECT_CC:
computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
// Only known if known in both the LHS and RHS.
@@ -2144,58 +2322,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::SHL:
- // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
-
- // If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
- break;
-
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- KnownZero <<= ShAmt;
- KnownOne <<= ShAmt;
- // low bits known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ KnownZero = KnownZero << *ShAmt;
+ KnownOne = KnownOne << *ShAmt;
+ // Low bits are known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
}
break;
case ISD::SRL:
- // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
-
- // If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
- break;
-
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
-
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- KnownZero |= HighBits; // High bits known zero.
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ KnownZero = KnownZero.lshr(*ShAmt);
+ KnownOne = KnownOne.lshr(*ShAmt);
+ // High bits are known zero.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
+ KnownZero |= HighBits;
}
break;
case ISD::SRA:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
-
- // If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
- break;
-
- // If any of the demanded bits are produced by the sign extension, we also
- // demand the input sign bit.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- KnownZero = KnownZero.lshr(ShAmt);
- KnownOne = KnownOne.lshr(ShAmt);
-
- // Handle the sign bits.
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ KnownZero = KnownZero.lshr(*ShAmt);
+ KnownOne = KnownOne.lshr(*ShAmt);
+ // If we know the value of the sign bit, then we know it is copied across
+ // the high bits by the shift amount.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
APInt SignBit = APInt::getSignBit(BitWidth);
- SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
-
+ SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask.
if (KnownZero.intersects(SignBit)) {
KnownZero |= HighBits; // New bits are known zero.
} else if (KnownOne.intersects(SignBit)) {
@@ -2205,7 +2362,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- unsigned EBits = EVT.getScalarType().getSizeInBits();
+ unsigned EBits = EVT.getScalarSizeInBits();
// Sign extension. Compute the demanded bits in the result that are not
// present in the input.
@@ -2220,7 +2377,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
KnownOne &= InputDemandedBits;
KnownZero &= InputDemandedBits;
@@ -2253,7 +2411,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// If this is a ZEXTLoad and we are looking at the loaded value.
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
- unsigned MemBits = VT.getScalarType().getSizeInBits();
+ unsigned MemBits = VT.getScalarSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
if (LD->getExtensionType() == ISD::NON_EXTLOAD)
@@ -2263,11 +2421,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarType().getSizeInBits();
+ unsigned InBits = InVT.getScalarSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
@@ -2275,30 +2434,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+ unsigned InBits = InVT.getScalarSizeInBits();
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
- // Note if the sign bit is known to be zero or one.
- bool SignBitKnownZero = KnownZero.isNegative();
- bool SignBitKnownOne = KnownOne.isNegative();
-
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
-
- // If the sign bit is known zero or one, the top bits match.
- if (SignBitKnownZero)
- KnownZero |= NewBits;
- else if (SignBitKnownOne)
- KnownOne |= NewBits;
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ KnownZero = KnownZero.sext(BitWidth);
+ KnownOne = KnownOne.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarType().getSizeInBits();
+ unsigned InBits = InVT.getScalarSizeInBits();
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
@@ -2308,10 +2459,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarType().getSizeInBits();
+ unsigned InBits = InVT.getScalarSizeInBits();
KnownZero = KnownZero.zext(InBits);
KnownOne = KnownOne.zext(InBits);
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
KnownZero = KnownZero.trunc(BitWidth);
KnownOne = KnownOne.trunc(BitWidth);
break;
@@ -2330,7 +2482,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
case ISD::SUB: {
- if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
// positive if we can prove that X is >= 0 and < 16.
@@ -2338,7 +2490,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -2350,8 +2503,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
}
}
+ LLVM_FALLTHROUGH;
}
- // fall through
case ISD::ADD:
case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
@@ -2361,17 +2514,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// known to be clear. For example, if one input has the top 10 bits clear
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
KnownZeroHigh = std::min(KnownZeroHigh,
KnownZero2.countLeadingOnes());
KnownZeroLow = std::min(KnownZeroLow,
KnownZero2.countTrailingOnes());
- if (Op.getOpcode() == ISD::ADD) {
+ if (Opcode == ISD::ADD) {
KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
if (KnownZeroHigh > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
@@ -2387,11 +2542,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
}
case ISD::SREM:
- if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -2411,11 +2567,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
break;
case ISD::UREM: {
- if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
// The upper bits are all zero, the lower ones are unchanged.
KnownZero = KnownZero2 | ~LowBits;
@@ -2426,8 +2583,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -2437,9 +2596,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::EXTRACT_ELEMENT: {
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- const unsigned Index =
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- const unsigned BitWidth = Op.getValueType().getSizeInBits();
+ const unsigned Index = Op.getConstantOperandVal(1);
+ const unsigned BitWidth = Op.getValueSizeInBits();
// Remove low part of known bits mask
KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
@@ -2450,8 +2608,74 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne.trunc(BitWidth);
break;
}
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
+ const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+ // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+ // anything about the extended bits.
+ if (BitWidth > EltBitWidth) {
+ KnownZero = KnownZero.trunc(EltBitWidth);
+ KnownOne = KnownOne.trunc(EltBitWidth);
+ }
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
+ // If we know the element index, just demand that vector element.
+ unsigned Idx = ConstEltNo->getZExtValue();
+ APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+ computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+ }
+ if (BitWidth > EltBitWidth) {
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then add its common known bits.
+ if (DemandedElts[EltIdx]) {
+ computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+ KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+ KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+ }
+
+ // If we demand the source vector then add its common known bits, ensuring
+ // that we don't demand the inserted element.
+ APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
+ if (!!VectorElts) {
+ computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1);
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+ computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+ KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+ KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+ }
+ break;
+ }
case ISD::BSWAP: {
- computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
KnownZero = KnownZero2.byteSwap();
KnownOne = KnownOne2.byteSwap();
break;
@@ -2460,13 +2684,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX: {
- APInt Op0Zero, Op0One;
- APInt Op1Zero, Op1One;
- computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
- computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
-
- KnownZero = Op0Zero & Op1Zero;
- KnownOne = Op0One & Op1One;
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ // If we don't know any bits, early out.
+ if (!KnownOne && !KnownZero)
+ break;
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
break;
}
case ISD::FrameIndex:
@@ -2479,9 +2705,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
break;
default:
- if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ if (Opcode < ISD::BUILTIN_OP_END)
break;
- // Fallthrough
+ LLVM_FALLTHROUGH;
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
@@ -2494,6 +2720,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarSizeInBits();
+
+ // Is the constant a known power of 2?
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
+ return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
if (Val.getOpcode() == ISD::SHL) {
@@ -2510,12 +2743,19 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
return true;
}
+ // Are all operands of a build vector constant powers of two?
+ if (Val.getOpcode() == ISD::BUILD_VECTOR)
+ if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
+ return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+ return false;
+ }))
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
// Fall back to computeKnownBits to catch other known cases.
- EVT OpVT = Val.getValueType();
- unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
APInt KnownZero, KnownOne;
computeKnownBits(Val, KnownZero, KnownOne);
return (KnownZero.countPopulation() == BitWidth - 1) &&
@@ -2525,7 +2765,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
- unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned VTBits = VT.getScalarSizeInBits();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -2547,14 +2787,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
}
case ISD::SIGN_EXTEND:
- Tmp =
- VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
// Max of the input and what this extends.
- Tmp =
- cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
Tmp = VTBits-Tmp+1;
Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2563,17 +2801,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
// SRA X, C -> adds C sign bits.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- Tmp += C->getZExtValue();
- if (Tmp > VTBits) Tmp = VTBits;
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+ APInt ShiftVal = C->getAPIntValue();
+ ShiftVal += Tmp;
+ Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
}
return Tmp;
case ISD::SHL:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (C->getZExtValue() >= VTBits || // Bad shift.
- C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ if (C->getAPIntValue().uge(VTBits) || // Bad shift.
+ C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
return Tmp - C->getZExtValue();
}
break;
@@ -2679,7 +2918,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
if (Tmp2 == 1) return 1;
// Handle NEG.
- if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero, KnownOne;
computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
@@ -2701,25 +2940,50 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
- case ISD::TRUNCATE:
- // FIXME: it's tricky to do anything useful for this, but it is an important
- // case for targets like X86.
+ case ISD::TRUNCATE: {
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
+ unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - VTBits))
+ return NumSrcSignBits - (NumSrcBits - VTBits);
break;
+ }
case ISD::EXTRACT_ELEMENT: {
const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- const int BitWidth = Op.getValueType().getSizeInBits();
- const int Items =
- Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+ const int BitWidth = Op.getValueSizeInBits();
+ const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
// Get reverse index (starting from 1), Op1 value indexes elements from
// little end. Sign starts at big end.
- const int rIndex = Items - 1 -
- cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
// If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // At the moment we keep this simple and skip tracking the specific
+ // element. This way we get the lowest common denominator for all elements
+ // of the vector.
+ // TODO: get information for given vector element
+ const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
+ // anything about sign bits. But if the sizes match we can derive knowledge
+ // about sign bits from the vector operand.
+ if (BitWidth == EltBitWidth)
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ break;
+ }
+ case ISD::EXTRACT_SUBVECTOR:
+ return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ case ISD::CONCAT_VECTORS:
+ // Determine the minimum number of sign bits across all input vectors.
+ // Early out if the result is already 1.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i)
+ Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1));
+ return Tmp;
}
// If we are looking at the loaded value of the SDNode.
@@ -2730,10 +2994,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
switch (ExtType) {
default: break;
case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
return VTBits-Tmp+1;
case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
return VTBits-Tmp;
}
}
@@ -2842,6 +3106,16 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
llvm::SelectionDAG &DAG) {
+ assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
+ assert(llvm::all_of(Ops,
+ [Ops](SDValue Op) {
+ return Ops[0].getValueType() == Op.getValueType();
+ }) &&
+ "Concatenation of vectors with inconsistent value types!");
+ assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
+ VT.getVectorNumElements() &&
+ "Incorrect element count in vector concatenation!");
+
if (Ops.size() == 1)
return Ops[0];
@@ -2875,7 +3149,7 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
? DAG.getZExtOrTrunc(Op, DL, SVT)
: DAG.getSExtOrTrunc(Op, DL, SVT);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+ return DAG.getBuildVector(VT, DL, Elts);
}
/// Gets or creates the specified node.
@@ -2924,13 +3198,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::BITCAST:
if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
- return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
+ return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
- return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
+ return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
- return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+ return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
- return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
+ return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -3162,8 +3436,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::BITCAST:
// Basic sanity checking.
- assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
- && "Cannot BITCAST between types of different sizes!");
+ assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+ "Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
@@ -3333,25 +3607,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 4> Outputs;
for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
- ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
- ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
- if (!V1 || !V2) // Not a constant, bail.
- return SDValue();
-
- if (V1->isOpaque() || V2->isOpaque())
- return SDValue();
+ SDValue V1 = BV1->getOperand(I);
+ SDValue V2 = BV2->getOperand(I);
// Avoid BUILD_VECTOR nodes that perform implicit truncation.
- // FIXME: This is valid and could be handled by truncating the APInts.
+ // FIXME: This is valid and could be handled by truncation.
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
return SDValue();
// Fold one vector element.
- std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
- V2->getAPIntValue());
- if (!Folded.second)
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
return SDValue();
- Outputs.push_back(getConstant(Folded.first, DL, SVT));
+ Outputs.push_back(ScalarResult);
}
assert(VT.getVectorNumElements() == Outputs.size() &&
@@ -3394,8 +3665,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
- if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
- !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+ if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ !all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
// If we are comparing vectors, then the result needs to be a i1 boolean
@@ -3577,8 +3848,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
- assert(N2.getValueType().getSizeInBits() >=
- Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
@@ -3609,7 +3879,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
VT.bitsLE(N1.getValueType()) &&
- N2C && "Invalid FP_ROUND!");
+ N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
+ "Invalid FP_ROUND!");
if (N1.getValueType() == VT) return N1; // noop conversion.
break;
case ISD::AssertSext:
@@ -3640,7 +3911,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (EVT == VT) return N1; // Not actually extending
auto SignExtendInReg = [&](APInt Val) {
- unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ unsigned FromBits = EVT.getScalarSizeInBits();
Val <<= Val.getBitWidth() - FromBits;
Val = Val.ashr(Val.getBitWidth() - FromBits);
return getConstant(Val, DL, VT.getScalarType());
@@ -3768,6 +4039,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Trivial extraction.
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
+
+ // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+ // during shuffle legalization.
+ if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+ VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
}
break;
}
@@ -3868,7 +4145,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse).
return getConstant(0, DL, VT);
- // fallthrough
+ LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE:
@@ -3977,6 +4254,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::INSERT_VECTOR_ELT: {
+ ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
+ // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
+ if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ return getUNDEF(VT);
+ break;
+ }
case ISD::INSERT_SUBVECTOR: {
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
@@ -4072,7 +4356,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
const SDLoc &dl) {
assert(!Value.isUndef());
- unsigned NumBits = VT.getScalarType().getSizeInBits();
+ unsigned NumBits = VT.getScalarSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
assert(C->getAPIntValue().getBitWidth() == 8);
APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
@@ -4306,10 +4590,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
- if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
@@ -4342,8 +4626,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
- if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
- MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
@@ -4422,10 +4706,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
- if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
@@ -4445,8 +4729,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
- if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
- MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
@@ -4519,10 +4803,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
- if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
@@ -4538,8 +4822,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
if (NewAlign > Align) {
// Give the stack frame object a larger alignment if needed.
- if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
- MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI.setObjectAlignment(FI->getIndex(), NewAlign);
Align = NewAlign;
}
}
@@ -4796,10 +5080,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTList, ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+ MachineMemOperand *MMO) {
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
AddNodeIDNode(ID, Opcode, VTList, Ops);
@@ -4811,8 +5092,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
}
auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
- VTList, MemVT, MMO, SuccessOrdering,
- FailureOrdering, SynchScope);
+ VTList, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -4820,14 +5100,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
- SDVTList VTList, ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO, AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
- return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
- Ordering, SynchScope);
-}
-
SDValue SelectionDAG::getAtomicCmpSwap(
unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
@@ -4847,26 +5119,23 @@ SDValue SelectionDAG::getAtomicCmpSwap(
auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
MachineMemOperand::MOStore;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ AAMDNodes(), nullptr, SynchScope, SuccessOrdering,
+ FailureOrdering);
- return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
- SuccessOrdering, FailureOrdering, SynchScope);
+ return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
}
SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp,
- MachineMemOperand *MMO,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+ MachineMemOperand *MMO) {
assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
- SuccessOrdering, FailureOrdering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
@@ -4892,16 +5161,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
- MemVT.getStoreSize(), Alignment);
+ MemVT.getStoreSize(), Alignment, AAMDNodes(),
+ nullptr, SynchScope, Ordering);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
- Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
- MachineMemOperand *MMO, AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ MachineMemOperand *MMO) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -4921,18 +5189,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Val};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
EVT VT, SDValue Chain, SDValue Ptr,
- MachineMemOperand *MMO, AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
+ MachineMemOperand *MMO) {
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
@@ -5056,7 +5323,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(VT);
+ Alignment = getEVTAlignment(MemVT);
MMOFlags |= MachineMemOperand::MOLoad;
assert((MMOFlags & MachineMemOperand::MOStore) == 0);
@@ -5101,9 +5368,8 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
- MMO->isNonTemporal(),
- MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5160,12 +5426,14 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
ISD::MemIndexedMode AM) {
LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
- // Don't propagate the invariant flag.
+ // Don't propagate the invariant or dereferenceable flags.
auto MMOFlags =
- LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(), LD->getAlignment(), MMOFlags);
+ LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
+ LD->getAAInfo());
}
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -5200,8 +5468,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5265,8 +5533,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(SVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5311,17 +5579,15 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
EVT MemVT, MachineMemOperand *MMO,
- ISD::LoadExtType ExtTy) {
+ ISD::LoadExtType ExtTy, bool isExpanding) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
- MMO->isVolatile(),
- MMO->isNonTemporal(),
- MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
+ dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5329,7 +5595,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- ExtTy, MemVT, MMO);
+ ExtTy, isExpanding, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -5340,7 +5606,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue Val, SDValue Ptr, SDValue Mask,
EVT MemVT, MachineMemOperand *MMO,
- bool isTrunc) {
+ bool IsTruncating, bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
@@ -5349,8 +5615,8 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
+ dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5358,7 +5624,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- isTrunc, MemVT, MMO);
+ IsTruncating, IsCompressing, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -5374,10 +5640,8 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
- MMO->isVolatile(),
- MMO->isNonTemporal(),
- MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5411,9 +5675,8 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(),
- MMO->isInvariant()));
+ ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5545,7 +5808,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
// If the and is only masking out bits that cannot effect the shift,
// eliminate the and.
- unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ unsigned NumBits = VT.getScalarSizeInBits()*2;
if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
}
@@ -5870,21 +6133,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT1, EVT VT2, EVT VT3, EVT VT4,
- ArrayRef<SDValue> Ops) {
- SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
- return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT1, EVT VT2,
- SDValue Op1) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
@@ -5893,24 +6141,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT1, EVT VT2,
- SDValue Op1, SDValue Op2,
- SDValue Op3) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT1, EVT VT2, EVT VT3,
- SDValue Op1, SDValue Op2,
- SDValue Op3) {
- SDVTList VTs = getVTList(VT1, VT2, VT3);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDVTList VTs,ArrayRef<SDValue> Ops) {
SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
// Reset the NodeID to -1.
@@ -5922,14 +6152,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
return New;
}
-/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
/// the line number information on the merged node since it is not possible to
/// preserve the information that operation is associated with multiple lines.
/// This will make the debugger working better at -O0, were there is a higher
/// probability having other instructions associated with that line.
///
/// For IROrder, we keep the smaller of the two
-SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) {
+SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
DebugLoc NLoc = N->getDebugLoc();
if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
N->setDebugLoc(DebugLoc());
@@ -5963,7 +6193,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops);
if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
- return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
+ return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
}
if (!RemoveNodeFromCSEMaps(N))
@@ -6050,19 +6280,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
- EVT VT1, EVT VT2) {
- SDVTList VTs = getVTList(VT1, VT2);
- return getMachineNode(Opcode, dl, VTs, None);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
- EVT VT1, EVT VT2, SDValue Op1) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1 };
- return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
EVT VT1, EVT VT2, SDValue Op1,
SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
@@ -6110,13 +6327,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
}
MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
- EVT VT1, EVT VT2, EVT VT3, EVT VT4,
- ArrayRef<SDValue> Ops) {
- SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
- return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(ResultTys);
@@ -6135,7 +6345,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
AddNodeIDNode(ID, ~Opcode, VTs, Ops);
IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
- return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
+ return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
}
}
@@ -6255,6 +6465,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
"Cannot replace with this method!");
assert(From != To.getNode() && "Cannot replace uses of with self");
+ // Preserve Debug Values
+ TransferDbgValues(FromN, To);
+
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
// This specifically avoids visiting uses of From that arise while the
@@ -6285,8 +6498,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
AddModifiedNodeToCSEMaps(User);
}
- // Preserve Debug Values
- TransferDbgValues(FromN, To);
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
@@ -6689,6 +6900,40 @@ bool llvm::isBitwiseNot(SDValue V) {
return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
}
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here.
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
+ return CN;
+ }
+
+ return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+ if (CN && UndefElements.none())
+ return CN;
+ }
+
+ return nullptr;
+}
+
HandleSDNode::~HandleSDNode() {
DropOperands();
}
@@ -6710,11 +6955,11 @@ AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
: SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
- MMO->isNonTemporal(), MMO->isInvariant());
- assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
- assert(isNonTemporal() == MMO->isNonTemporal() &&
- "Non-temporal encoding error!");
+ MemSDNodeBits.IsVolatile = MMO->isVolatile();
+ MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
+ MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
+ MemSDNodeBits.IsInvariant = MMO->isInvariant();
+
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
@@ -6939,8 +7184,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
for (; i < ResNE; ++i)
Scalars.push_back(getUNDEF(EltVT));
- return getNode(ISD::BUILD_VECTOR, dl,
- EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
+ EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+ return getBuildVector(VecVT, dl, Scalars);
}
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
@@ -6962,13 +7207,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
return false;
- const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
+ int FS = MFI.getObjectSize(FI);
+ int BFS = MFI.getObjectSize(BFI);
if (FS != BFS || FS != (int)Bytes) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
}
// Handle X + C.
@@ -7033,7 +7278,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
}
if (FrameIdx != (1 << 31)) {
- const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
return FIInfoAlign;
@@ -7124,7 +7369,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
// false.
unsigned int nOps = getNumOperands();
assert(nOps > 0 && "isConstantSplat has 0-size build vector");
- unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+ unsigned EltBitSize = VT.getScalarSizeInBits();
for (unsigned j = 0; j < nOps; ++j) {
unsigned i = isBigEndian ? nOps-1-j : j;
@@ -7265,6 +7510,16 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
return nullptr;
}
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+
+ return nullptr;
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e1fc37d..996c95b 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -183,7 +184,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
Hi =
DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
- DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+ DAG.getConstant(Lo.getValueSizeInBits(), DL,
TLI.getPointerTy(DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
@@ -833,8 +834,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
// If we clobbered the stack pointer, MFI should know about it.
- assert(DAG.getMachineFunction().getFrameInfo()->
- hasOpaqueSPAdjustment());
+ assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
}
}
}
@@ -931,46 +931,9 @@ SDValue SelectionDAGBuilder::getControlRoot() {
return Root;
}
-/// Copy swift error to the final virtual register at end of a basic block, as
-/// specified by SwiftErrorWorklist, if necessary.
-static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) {
- const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo();
- if (!TLI.supportSwiftError())
- return;
-
- if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB))
- return;
-
- // Go through entries in SwiftErrorWorklist, and create copy as necessary.
- FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry =
- SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB];
- FunctionLoweringInfo::SwiftErrorVRegs &MapEntry =
- SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB];
- for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) {
- unsigned WorkReg = WorklistEntry[I];
-
- // Find the swifterror virtual register for the value in SwiftErrorMap.
- unsigned MapReg = MapEntry[I];
- assert(TargetRegisterInfo::isVirtualRegister(MapReg) &&
- "Entries in SwiftErrorMap should be virtual registers");
-
- if (WorkReg == MapReg)
- continue;
-
- // Create copy from SwiftErrorMap to SwiftWorklist.
- auto &DL = SDB.DAG.getDataLayout();
- SDValue CopyNode = SDB.DAG.getCopyToReg(
- SDB.getRoot(), SDB.getCurSDLoc(), WorkReg,
- SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL))));
- MapEntry[I] = WorkReg;
- SDB.DAG.setRoot(CopyNode);
- }
-}
-
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (isa<TerminatorInst>(&I)) {
- copySwiftErrorsToFinalVRegs(*this);
HandlePHINodesInSuccessorBlocks(I.getParent());
}
@@ -1021,8 +984,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
if (Val.getNode()) {
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
Val)) {
- SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
- false, Offset, dl, DbgSDNodeOrder);
+ SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1491,6 +1453,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
if (TLI.supportSwiftError() &&
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+ assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1498,7 +1461,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
true /*isfixed*/, 1 /*origidx*/,
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
- OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0],
+ OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(
+ FuncInfo.MBB, FuncInfo.SwiftErrorArg),
EVT(TLI.getPointerTy(DL))));
}
@@ -2012,7 +1976,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
if (Global) {
MachinePointerInfo MPInfo(Global);
MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
- auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable;
*MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
DAG.getEVTAlignment(PtrTy));
Node->setMemRefs(MemRefs, MemRefs + 1);
@@ -2033,8 +1998,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
- MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
- int FI = MFI->getStackProtectorIndex();
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
SDValue Guard;
SDLoc dl = getCurSDLoc();
@@ -2329,8 +2294,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
"Call to landingpad not in landing pad!");
MachineBasicBlock *MBB = FuncInfo.MBB;
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- AddLandingPadInfo(LP, MMI, MBB);
+ addLandingPadInfo(LP, *MBB);
// If there aren't registers to copy the values into (e.g., during SjLj
// exceptions), then don't bother to create these DAG nodes.
@@ -2484,7 +2448,7 @@ static bool isVectorReductionOp(const User *I) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
if (FPOp->getFastMathFlags().unsafeAlgebra())
break;
- // Fall through.
+ LLVM_FALLTHROUGH;
default:
return false;
}
@@ -2639,7 +2603,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// Coerce the shift amount to the right type if we can.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
unsigned ShiftSize = ShiftTy.getSizeInBits();
- unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ unsigned Op2Size = Op2.getValueSizeInBits();
SDLoc DL = getCurSDLoc();
// If the operand is smaller than the shift count type, promote it.
@@ -2650,7 +2614,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
@@ -2731,7 +2695,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
// Check if the condition of the select has one use or two users that are both
// selects with the same condition.
static bool hasOnlySelectUsers(const Value *Cond) {
- return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) {
+ return all_of(Cond->users(), [](const Value *V) {
return isa<SelectInst>(V);
});
}
@@ -2998,6 +2962,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
+ SDLoc DL = getCurSDLoc();
SmallVector<int, 8> Mask;
ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
@@ -3009,54 +2974,60 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
unsigned SrcNumElts = SrcVT.getVectorNumElements();
if (SrcNumElts == MaskNumElts) {
- setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask));
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
}
// Normalize the shuffle vector since mask and vector length don't match.
- if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
- // Mask is longer than the source vectors and is a multiple of the source
- // vectors. We can use concatenate vector to make the mask and vectors
- // lengths match.
-
- unsigned NumConcat = MaskNumElts / SrcNumElts;
-
- // Check if the shuffle is some kind of concatenation of the input vectors.
- bool IsConcat = true;
- SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx < 0)
- continue;
- // Ensure the indices in each SrcVT sized piece are sequential and that
- // the same source is used for the whole piece.
- if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
- (ConcatSrcs[i / SrcNumElts] >= 0 &&
- ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
- IsConcat = false;
- break;
+ if (SrcNumElts < MaskNumElts) {
+ // Mask is longer than the source vectors. We can use concatenate vector to
+ // make the mask and vectors lengths match.
+
+ if (MaskNumElts % SrcNumElts == 0) {
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
+ }
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
}
- // Remember which source this index came from.
- ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
- }
- // The shuffle is concatenating multiple vectors together. Just emit
- // a CONCAT_VECTORS operation.
- if (IsConcat) {
- SmallVector<SDValue, 8> ConcatOps;
- for (auto Src : ConcatSrcs) {
- if (Src < 0)
- ConcatOps.push_back(DAG.getUNDEF(SrcVT));
- else if (Src == 0)
- ConcatOps.push_back(Src1);
- else
- ConcatOps.push_back(Src2);
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
+ }
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+ return;
}
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
- VT, ConcatOps));
- return;
}
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ PaddedMaskNumElts);
+
// Pad both vectors with undefs to make them the same length as the mask.
SDValue UndefVal = DAG.getUNDEF(SrcVT);
@@ -3065,24 +3036,32 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps1[0] = Src1;
MOps2[0] = Src2;
- Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
- : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT, MOps1);
- Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
- : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT, MOps2);
+ Src1 = Src1.isUndef()
+ ? DAG.getUNDEF(PaddedVT)
+ : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = Src2.isUndef()
+ ? DAG.getUNDEF(PaddedVT)
+ : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
- SmallVector<int, 8> MappedOps;
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx >= (int)SrcNumElts)
- Idx -= SrcNumElts - MaskNumElts;
- MappedOps.push_back(Idx);
+ Idx -= SrcNumElts - PaddedMaskNumElts;
+ MappedOps[i] = Idx;
}
- setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
- MappedOps));
+ SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+ // If the concatenated vector was padded, extract a subvector with the
+ // correct number of elements.
+ if (MaskNumElts != PaddedMaskNumElts)
+ Result = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ setValue(&I, Result);
return;
}
@@ -3141,10 +3120,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (RangeUse[Input] == 0)
Src = DAG.getUNDEF(VT);
else {
- SDLoc dl = getCurSDLoc();
Src = DAG.getNode(
- ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
- DAG.getConstant(StartIdx[Input], dl,
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+ DAG.getConstant(StartIdx[Input], DL,
TLI.getVectorIdxTy(DAG.getDataLayout())));
}
}
@@ -3162,8 +3140,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MappedOps.push_back(Idx);
}
- setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
- MappedOps));
+ setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
return;
}
}
@@ -3173,7 +3150,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
// to insert and build vector.
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDLoc dl = getCurSDLoc();
SmallVector<SDValue,8> Ops;
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
@@ -3185,14 +3161,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
- Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
}
Ops.push_back(Res);
}
- setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops));
}
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3293,13 +3269,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (VectorWidth && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
- SmallVector<SDValue, 16> Ops(VectorWidth, N);
- N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ N = DAG.getSplatBuildVector(VT, dl, N);
}
+
for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
GTI != E; ++GTI) {
const Value *Idx = GTI.getOperand();
- if (StructType *StTy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *StTy = GTI.getStructTypeOrNull()) {
unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
if (Field) {
// N = N + Offset
@@ -3331,8 +3307,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ LLVMContext &Context = *DAG.getContext();
SDValue OffsVal = VectorWidth ?
- DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
DAG.getConstant(Offs, dl, PtrTy);
// In an inbouds GEP with an offset that is nonnegative even when
@@ -3350,9 +3327,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && VectorWidth) {
MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
- SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
- IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
}
+
// If the index is smaller or larger than intptr_t, truncate or extend
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -3433,7 +3410,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
- assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
+ assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3462,17 +3439,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
-
- // The IR notion of invariant_load only guarantees that all *non-faulting*
- // invariant loads result in the same value. The MI notion of invariant load
- // guarantees that the load can be legally moved to any location within its
- // containing function. The MI notion of invariant_load is stronger than the
- // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
- // with a guarantee that the location being loaded from is dereferenceable
- // throughout the function's lifetime.
-
- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
- isDereferenceablePointer(SV, DAG.getDataLayout());
+ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
@@ -3540,6 +3508,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
MMOFlags |= MachineMemOperand::MONonTemporal;
if (isInvariant)
MMOFlags |= MachineMemOperand::MOInvariant;
+ if (isDereferenceable)
+ MMOFlags |= MachineMemOperand::MODereferenceable;
SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -3585,7 +3555,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
- FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3613,9 +3583,9 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
"expect a single EVT for swifterror");
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
- SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(),
- FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV),
- ValueVTs[0]);
+ SDValue L = DAG.getCopyFromReg(
+ getRoot(), getCurSDLoc(),
+ FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]);
setValue(&I, L);
}
@@ -3697,16 +3667,39 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
-void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+ bool IsCompressing) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
- Value *PtrOperand = I.getArgOperand(1);
+ auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Mask = I.getArgOperand(3);
+ };
+ auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // llvm.masked.compressstore.*(Src0, Ptr, Mask)
+ Src0 = I.getArgOperand(0);
+ Ptr = I.getArgOperand(1);
+ Mask = I.getArgOperand(2);
+ Alignment = 0;
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ unsigned Alignment;
+ if (IsCompressing)
+ getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
SDValue Ptr = getValue(PtrOperand);
- SDValue Src0 = getValue(I.getArgOperand(0));
- SDValue Mask = getValue(I.getArgOperand(3));
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
+
EVT VT = Src0.getValueType();
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
if (!Alignment)
Alignment = DAG.getEVTAlignment(VT);
@@ -3719,7 +3712,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
- MMO, false);
+ MMO, false /* Truncating */,
+ IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@@ -3740,7 +3734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
// extract the spalt value and use it as a uniform base.
// In all other cases the function returns 'false'.
//
-static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
+static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
SelectionDAG& DAG = SDB->DAG;
@@ -3777,8 +3771,7 @@ static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
- SmallVector<SDValue, 16> Ops(GEPWidth, Index);
- Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
}
return true;
}
@@ -3820,18 +3813,38 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
setValue(&I, Scatter);
}
-void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDLoc sdl = getCurSDLoc();
- // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
- Value *PtrOperand = I.getArgOperand(0);
+ auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Mask = I.getArgOperand(2);
+ Src0 = I.getArgOperand(3);
+ };
+ auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+ unsigned& Alignment) {
+ // @llvm.masked.expandload.*(Ptr, Mask, Src0)
+ Ptr = I.getArgOperand(0);
+ Alignment = 0;
+ Mask = I.getArgOperand(1);
+ Src0 = I.getArgOperand(2);
+ };
+
+ Value *PtrOperand, *MaskOperand, *Src0Operand;
+ unsigned Alignment;
+ if (IsExpanding)
+ getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+ else
+ getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
SDValue Ptr = getValue(PtrOperand);
- SDValue Src0 = getValue(I.getArgOperand(3));
- SDValue Mask = getValue(I.getArgOperand(2));
+ SDValue Src0 = getValue(Src0Operand);
+ SDValue Mask = getValue(MaskOperand);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ EVT VT = Src0.getValueType();
if (!Alignment)
Alignment = DAG.getEVTAlignment(VT);
@@ -3851,7 +3864,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
Alignment, AAInfo, Ranges);
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
- ISD::NON_EXTLOAD);
+ ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain) {
SDValue OutChain = Load.getValue(1);
DAG.setRoot(OutChain);
@@ -4003,13 +4016,13 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
MachineMemOperand::MOLoad,
VT.getStoreSize(),
I.getAlignment() ? I.getAlignment() :
- DAG.getEVTAlignment(VT));
+ DAG.getEVTAlignment(VT),
+ AAMDNodes(), nullptr, Scope, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
- getValue(I.getPointerOperand()), MMO,
- Order, Scope);
+ getValue(I.getPointerOperand()), MMO);
SDValue OutChain = L.getValue(1);
@@ -4047,8 +4060,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
/// node.
void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
unsigned Intrinsic) {
- bool HasChain = !I.doesNotAccessMemory();
- bool OnlyLoad = HasChain && I.onlyReadsMemory();
+ // Ignore the callsite's attributes. A specific call site may be marked with
+ // readnone, but the lowering code will expect the chain based on the
+ // definition.
+ const Function *F = I.getCalledFunction();
+ bool HasChain = !F->doesNotAccessMemory();
+ bool OnlyLoad = HasChain && F->onlyReadsMemory();
// Build the operand list.
SmallVector<SDValue, 8> Ops;
@@ -4156,7 +4173,7 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
/// getF32Constant - Get 32-bit floating point constant.
static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
const SDLoc &dl) {
- return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
MVT::f32);
}
@@ -4743,6 +4760,32 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
return true;
}
+/// Return the appropriate SDDbgValue based on N.
+SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
+ DILocalVariable *Variable,
+ DIExpression *Expr, int64_t Offset,
+ DebugLoc dl,
+ unsigned DbgSDNodeOrder) {
+ SDDbgValue *SDV;
+ auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (FISDN && Expr->startsWithDeref()) {
+ // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
+ // stack slot locations as such instead of as indirectly addressed
+ // locations.
+ ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1,
+ Expr->elements_end());
+ DIExpression *DerefedDIExpr =
+ DIExpression::get(*DAG.getContext(), TrailingElements);
+ int FI = FISDN->getIndex();
+ SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl,
+ DbgSDNodeOrder);
+ } else {
+ SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
+ Offset, dl, DbgSDNodeOrder);
+ }
+ return SDV;
+}
+
// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
!defined(setjmp_undefined_for_msvc)
@@ -4774,6 +4817,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
+ case Intrinsic::addressofreturnaddress:
+ setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return nullptr;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
@@ -4850,6 +4897,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
updateDAGForMaybeTailCall(MM);
return nullptr;
}
+ case Intrinsic::memcpy_element_atomic: {
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue NumElements = getValue(I.getArgOperand(2));
+ SDValue ElementSize = getValue(I.getArgOperand(3));
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+
+ Entry.Node = Src;
+ Args.push_back(Entry);
+
+ Entry.Ty = I.getArgOperand(2)->getType();
+ Entry.Node = NumElements;
+ Args.push_back(Entry);
+
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.Node = ElementSize;
+ Args.push_back(Entry);
+
+ uint64_t ElementSizeConstant =
+ cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ RTLIB::Libcall LibraryCall =
+ RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+ if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported element size");
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl)
+ .setChain(getRoot())
+ .setCallee(TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ DAG.setRoot(CallResult.second);
+ return nullptr;
+ }
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
DILocalVariable *Variable = DI.getVariable();
@@ -4944,8 +5036,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (N.getNode()) {
if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
false, N)) {
- SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- false, Offset, dl, SDNodeOrder);
+ SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4980,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
- unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, sdl, MVT::i32);
setValue(&I, Res);
return nullptr;
@@ -4988,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_return_i32:
case Intrinsic::eh_return_i64:
- DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.getMachineFunction().setCallsEHReturn(true);
DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
MVT::Other,
getControlRoot(),
@@ -4996,7 +5087,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1))));
return nullptr;
case Intrinsic::eh_unwind_init:
- DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ DAG.getMachineFunction().setCallsUnwindInit(true);
return nullptr;
case Intrinsic::eh_dwarf_cfa: {
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
@@ -5015,11 +5106,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
AllocaInst *FnCtx =
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
- MFI->setFunctionContextIndex(FI);
+ MFI.setFunctionContextIndex(FI);
return nullptr;
}
case Intrinsic::eh_sjlj_setjmp: {
@@ -5055,6 +5146,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::masked_store:
visitMaskedStore(I);
return nullptr;
+ case Intrinsic::masked_expandload:
+ visitMaskedLoad(I, true /* IsExpanding */);
+ return nullptr;
+ case Intrinsic::masked_compressstore:
+ visitMaskedStore(I, true /* IsCompressing */);
+ return nullptr;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -5114,39 +5211,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
- case Intrinsic::convertff:
- case Intrinsic::convertfsi:
- case Intrinsic::convertfui:
- case Intrinsic::convertsif:
- case Intrinsic::convertuif:
- case Intrinsic::convertss:
- case Intrinsic::convertsu:
- case Intrinsic::convertus:
- case Intrinsic::convertuu: {
- ISD::CvtCode Code = ISD::CVT_INVALID;
- switch (Intrinsic) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::convertff: Code = ISD::CVT_FF; break;
- case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
- case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
- case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
- case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
- case Intrinsic::convertss: Code = ISD::CVT_SS; break;
- case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
- case Intrinsic::convertus: Code = ISD::CVT_US; break;
- case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
- }
- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- const Value *Op1 = I.getArgOperand(0);
- Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
- DAG.getValueType(DestVT),
- DAG.getValueType(getValue(Op1).getValueType()),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2)),
- Code);
- setValue(&I, Res);
- return nullptr;
- }
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
@@ -5368,7 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Src, Chain = getRoot();
@@ -5380,7 +5444,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
- MFI->setStackProtectorIndex(FI);
+ MFI.setStackProtectorIndex(FI);
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
@@ -5411,6 +5475,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
+ case Intrinsic::invariant_group_barrier:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return nullptr;
@@ -5687,7 +5752,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineModuleInfo &MMI = MF.getMMI();
MCSymbol *BeginLabel = nullptr;
if (EHPadBB) {
@@ -5699,7 +5765,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
// so as to maintain the ordering of pads in the LSDA.
unsigned CallSiteIndex = MMI.getCurrentCallSite();
if (CallSiteIndex) {
- MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
// Now that the call site is handled, stop tracking it.
@@ -5740,13 +5806,13 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
// Inform MachineModuleInfo of range.
- if (MMI.hasEHFunclets()) {
+ if (MF.hasEHFunclets()) {
assert(CLI.CS);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
BeginLabel, EndLabel);
} else {
- MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
}
}
@@ -5766,6 +5832,15 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
@@ -5785,9 +5860,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node = DAG.getRegister(
- FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V),
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node =
+ DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -5803,6 +5878,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
isTailCall = false;
+ // Disable tail calls if there is an swifterror argument. Targets have not
+ // been updated to support tail calls.
+ if (TLI.supportSwiftError() && SwiftErrorVal)
+ isTailCall = false;
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -5827,7 +5907,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
// We update the virtual register for the actual swifterror argument.
- FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+ FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
DAG.setRoot(CopyNode);
}
}
@@ -6035,6 +6115,49 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
+///
+/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
+/// to adjust the dst pointer by the size of the copied memory.
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+
+ // Verify argument count: void *mempcpy(void *, const void *, size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Src = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+
+ unsigned DstAlign = DAG.InferPtrAlignment(Dst);
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ unsigned Align = std::min(DstAlign, SrcAlign);
+ if (Align == 0) // Alignment of one or both could not be inferred.
+ Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+
+ bool isVol = false;
+ SDLoc sdl = getCurSDLoc();
+
+ // In the mempcpy context we need to pass in a false value for isTailCall
+ // because the return pointer needs to be adjusted by the size of
+ // the copied memory.
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+ false, /*isTailCall=*/false,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ assert(MC.getNode() != nullptr &&
+ "** memcpy should not be lowered as TailCall in mempcpy context **");
+ DAG.setRoot(MC);
+
+ // Check if Size needs to be truncated or extended.
+ Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
+
+ // Adjust return pointer to point just past the last dst byte.
+ SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
+ Dst, Size);
+ setValue(&I, DstPlusSize);
+ return true;
+}
+
/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
/// optimized form. If so, return true and lower it, otherwise return false
/// and it will be lowered like a normal call.
@@ -6191,7 +6314,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- ComputeUsesVAFloatArgument(I, &MMI);
+ computeUsesVAFloatArgument(I, MMI);
const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
@@ -6325,6 +6448,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitMemCmpCall(I))
return;
break;
+ case LibFunc::mempcpy:
+ if (visitMemPCpyCall(I))
+ return;
+ break;
case LibFunc::memchr:
if (visitMemChrCall(I))
return;
@@ -6395,6 +6522,19 @@ public:
: TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
}
+ /// Whether or not this operand accesses memory
+ bool hasMemory(const TargetLowering &TLI) const {
+ // Indirect operand accesses access memory.
+ if (isIndirect)
+ return true;
+
+ for (const auto &Code : Codes)
+ if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
+ return true;
+
+ return false;
+ }
+
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
/// corresponds to. If there is no Value* for this operand, it returns
/// MVT::Other.
@@ -6447,6 +6587,75 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
} // end anonymous namespace
+/// Make sure that the output operand \p OpInfo and its corresponding input
+/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
+/// out).
+static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &MatchingOpInfo,
+ SelectionDAG &DAG) {
+ if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
+ return;
+
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
+ MatchingOpInfo.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ MatchingOpInfo.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ // FIXME: error out in a more elegant fashion
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
+}
+
+/// Get a direct memory input to behave well as an indirect operand.
+/// This may introduce stores, hence the need for a \p Chain.
+/// \return The (possibly updated) chain.
+static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
+ SDISelAsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // If we don't have an indirect input, put it in the constpool if we can,
+ // otherwise spill it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(
+ cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+ return Chain;
+ }
+
+ // Otherwise, create a stack slot and emit a store to it before the asm.
+ Type *Ty = OpVal->getType();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align = DL.getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+ Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(MF, SSFI));
+ OpInfo.CallOperand = StackSlot;
+
+ return Chain;
+}
+
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
/// register allocator to handle the assignment process. However, if the asm
@@ -6555,6 +6764,73 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// Otherwise, we couldn't allocate enough registers for this.
}
+static unsigned
+findMatchingInlineAsmOperand(unsigned OperandNo,
+ const std::vector<SDValue> &AsmNodeOperands) {
+ // Scan until we find the definition we already emitted of this operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) &&
+ "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+ }
+ return CurOp;
+}
+
+/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
+/// \return true if it has succeeded, false otherwise
+static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
+ MVT RegVT, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = NumRegs; i != e; ++i) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+ else
+ return false;
+ }
+ return true;
+}
+
+class ExtraFlags {
+ unsigned Flags = 0;
+
+public:
+ explicit ExtraFlags(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ if (IA->hasSideEffects())
+ Flags |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ Flags |= InlineAsm::Extra_IsAlignStack;
+ if (CS.isConvergent())
+ Flags |= InlineAsm::Extra_IsConvergent;
+ Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+ }
+
+ void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an Other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for Other constraints as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ Flags |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ Flags |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ unsigned get() const { return Flags; }
+};
+
/// visitInlineAsm - Handle a call to an InlineAsm object.
///
void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
@@ -6569,6 +6845,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
bool hasMemory = false;
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ ExtraFlags ExtraInfo(CS);
+
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
@@ -6578,14 +6857,25 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT OpVT = MVT::Other;
// Compute the value type for each operand.
- switch (OpInfo.Type) {
- case InlineAsm::isOutput:
- // Indirect outputs just consume an argument.
- if (OpInfo.isIndirect) {
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
- break;
+ if (OpInfo.Type == InlineAsm::isInput ||
+ (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+
+ // Process the call argument. BasicBlocks are labels, currently appearing
+ // only in asm's.
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
+ OpVT =
+ OpInfo
+ .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
+ .getSimpleVT();
+ }
+
+ if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
@@ -6597,43 +6887,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
}
++ResNo;
- break;
- case InlineAsm::isInput:
- OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
- break;
- case InlineAsm::isClobber:
- // Nothing to do.
- break;
}
- // If this is an input or an indirect output, process the call argument.
- // BasicBlocks are labels, currently appearing only in asm's.
- if (OpInfo.CallOperandVal) {
- if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
- OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
- } else {
- OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- }
+ OpInfo.ConstraintVT = OpVT;
- OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
- DAG.getDataLayout()).getSimpleVT();
- }
+ if (!hasMemory)
+ hasMemory = OpInfo.hasMemory(TLI);
- OpInfo.ConstraintVT = OpVT;
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
+ auto TargetConstraint = TargetConstraints[i];
- // Indirect operand accesses access memory.
- if (OpInfo.isIndirect)
- hasMemory = true;
- else {
- for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
- TargetLowering::ConstraintType
- CType = TLI.getConstraintType(OpInfo.Codes[j]);
- if (CType == TargetLowering::C_Memory) {
- hasMemory = true;
- break;
- }
- }
- }
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+
+ ExtraInfo.update(TargetConstraint);
}
SDValue Chain, Flag;
@@ -6656,24 +6924,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
- if (OpInfo.ConstraintVT != Input.ConstraintVT) {
- const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
- std::pair<unsigned, const TargetRegisterClass *> MatchRC =
- TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
- OpInfo.ConstraintVT);
- std::pair<unsigned, const TargetRegisterClass *> InputRC =
- TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
- Input.ConstraintVT);
- if ((OpInfo.ConstraintVT.isInteger() !=
- Input.ConstraintVT.isInteger()) ||
- (MatchRC.second != InputRC.second)) {
- report_fatal_error("Unsupported asm: input constraint"
- " with a matching output constraint of"
- " incompatible type!");
- }
- Input.ConstraintVT = OpInfo.ConstraintVT;
- }
+ patchMatchingInput(OpInfo, Input, DAG);
}
// Compute the constraint code and ConstraintType to use.
@@ -6691,37 +6942,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
(OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
- // Memory operands really want the address of the value. If we don't have
- // an indirect input, put it in the constpool if we can, otherwise spill
- // it to a stack slot.
- // TODO: This isn't quite right. We need to handle these according to
- // the addressing mode that the constraint wants. Also, this may take
- // an additional register for the computation and we don't want that
- // either.
-
- // If the operand is a float, integer, or vector constant, spill to a
- // constant pool entry to get its address.
- const Value *OpVal = OpInfo.CallOperandVal;
- if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
- isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
- OpInfo.CallOperand = DAG.getConstantPool(
- cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
- } else {
- // Otherwise, create a stack slot and emit a store to it before the
- // asm.
- Type *Ty = OpVal->getType();
- auto &DL = DAG.getDataLayout();
- uint64_t TySize = DL.getTypeAllocSize(Ty);
- unsigned Align = DL.getPrefTypeAlignment(Ty);
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- SDValue StackSlot =
- DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getStore(
- Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
- OpInfo.CallOperand = StackSlot;
- }
+ // Memory operands really want the address of the value.
+ Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
// There is no longer a Value* corresponding to this operand.
OpInfo.CallOperandVal = nullptr;
@@ -6736,7 +6958,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
}
- // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // Third pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
@@ -6761,40 +6983,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
// bits as operand 3.
- unsigned ExtraInfo = 0;
- if (IA->hasSideEffects())
- ExtraInfo |= InlineAsm::Extra_HasSideEffects;
- if (IA->isAlignStack())
- ExtraInfo |= InlineAsm::Extra_IsAlignStack;
- if (CS.isConvergent())
- ExtraInfo |= InlineAsm::Extra_IsConvergent;
- // Set the asm dialect.
- ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
-
- // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
- // Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
- // Ideally, we would only check against memory constraints. However, the
- // meaning of an other constraint can be target-specific and we can't easily
- // reason about it. Therefore, be conservative and set MayLoad/MayStore
- // for other constriants as well.
- if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- OpInfo.ConstraintType == TargetLowering::C_Other) {
- if (OpInfo.Type == InlineAsm::isInput)
- ExtraInfo |= InlineAsm::Extra_MayLoad;
- else if (OpInfo.Type == InlineAsm::isOutput)
- ExtraInfo |= InlineAsm::Extra_MayStore;
- else if (OpInfo.Type == InlineAsm::isClobber)
- ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
- }
- }
-
AsmNodeOperands.push_back(DAG.getTargetConstant(
- ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -6862,24 +7052,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
case InlineAsm::isInput: {
SDValue InOperandVal = OpInfo.CallOperand;
- if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ if (OpInfo.isMatchingInputConstraint()) {
// If this is required to match an output register we have already set,
// just use its register.
- unsigned OperandNo = OpInfo.getMatchedOperand();
-
- // Scan until we find the definition we already emitted of this operand.
- // When we find it, create a RegsForValue operand.
- unsigned CurOp = InlineAsm::Op_FirstOperand;
- for (; OperandNo; --OperandNo) {
- // Advance to the next operand.
- unsigned OpFlag =
- cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
- assert((InlineAsm::isRegDefKind(OpFlag) ||
- InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
- InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
- CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
- }
-
+ auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
+ AsmNodeOperands);
unsigned OpFlag =
cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
if (InlineAsm::isRegDefKind(OpFlag) ||
@@ -6893,22 +7070,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
return;
}
- RegsForValue MatchedRegs;
- MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
- MatchedRegs.RegVTs.push_back(RegVT);
- MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
- for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
- i != e; ++i) {
- if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
- MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
- else {
- emitInlineAsmError(
- CS, "inline asm error: This value"
- " type register class is not natively supported!");
- return;
- }
+ SmallVector<unsigned, 4> Regs;
+
+ if (!createVirtualRegs(Regs,
+ InlineAsm::getNumOperandRegisters(OpFlag),
+ RegVT, DAG)) {
+ emitInlineAsmError(CS, "inline asm error: This value type register "
+ "class is not natively supported!");
+ return;
}
+
+ RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
+
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
@@ -7142,19 +7316,23 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
if (!Range)
return Op;
- Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
- if (!Lo->isNullValue())
+ ConstantRange CR = getConstantRangeFromMetadata(*Range);
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
+ return Op;
+
+ APInt Lo = CR.getUnsignedMin();
+ if (!Lo.isMinValue())
return Op;
- Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
- unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+ APInt Hi = CR.getUnsignedMax();
+ unsigned Bits = Hi.getActiveBits();
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
- SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
- Op, DAG.getValueType(SmallVT));
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
+ DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
@@ -7299,7 +7477,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
DAG.setRoot(Chain);
// Inform the Frame Information that we have a stackmap in this function.
- FuncInfo.MF->getFrameInfo()->setHasStackMap();
+ FuncInfo.MF->getFrameInfo().setHasStackMap();
}
/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
@@ -7450,7 +7628,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
DAG.DeleteNode(Call);
// Inform the Frame Information that we have a patchpoint in this function.
- FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+ FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
/// Returns an AttributeSet representing the attributes applied to the return
@@ -7498,7 +7676,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
MachineFunction &MF = CLI.DAG.getMachineFunction();
- DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
@@ -7580,8 +7758,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setZExt();
if (Args[i].isSExt)
Flags.setSExt();
- if (Args[i].isInReg)
+ if (Args[i].isInReg) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (CLI.CallConv == CallingConv::X86_VectorCall &&
+ isa<StructType>(FinalType)) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
Flags.setInReg();
+ }
if (Args[i].isSRet)
Flags.setSRet();
if (Args[i].isSwiftSelf)
@@ -7867,8 +8056,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setZExt();
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
Flags.setSExt();
- if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
+ // If we are using vectorcall calling convention, a structure that is
+ // passed InReg - is surely an HVA
+ if (F.getCallingConv() == CallingConv::X86_VectorCall &&
+ isa<StructType>(I->getType())) {
+ // The first value of a structure is marked
+ if (0 == Value)
+ Flags.setHvaStart();
+ Flags.setHva();
+ }
+ // Set InReg Flag
Flags.setInReg();
+ }
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
Flags.setSRet();
if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
@@ -7990,7 +8190,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If this argument is unused then remember its value. It is used to generate
// debugging information.
- if (I->use_empty() && NumValues) {
+ bool isSwiftErrorArg =
+ TLI->supportSwiftError() &&
+ F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
+ if (I->use_empty() && NumValues && !isSwiftErrorArg) {
SDB->setUnusedArgValue(&*I, InVals[i]);
// Also remember any frame index for use in FastISel.
@@ -8004,7 +8207,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
- if (!I->use_empty()) {
+ // Even an apparant 'unused' swifterror argument needs to be returned. So
+ // we do generate a copy for it that can be used on return from the
+ // function.
+ if (!I->use_empty() || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
AssertOp = ISD::AssertSext;
@@ -8040,12 +8246,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
}
- // Update SwiftErrorMap.
- if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() &&
- F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) {
+ // Update the SwiftErrorVRegDefMap.
+ if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
- FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg;
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
+ FuncInfo->SwiftErrorArg, Reg);
}
// If this argument is live outside of the entry block, insert a copy from
@@ -8197,14 +8403,14 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
}
bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
- unsigned *TotalCases, unsigned First,
- unsigned Last,
- unsigned Density) {
+ const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last,
+ unsigned Density) const {
assert(Last >= First);
assert(TotalCases[Last] >= TotalCases[First]);
- APInt LowCase = Clusters[First].Low->getValue();
- APInt HighCase = Clusters[Last].High->getValue();
+ const APInt &LowCase = Clusters[First].Low->getValue();
+ const APInt &HighCase = Clusters[Last].High->getValue();
assert(LowCase.getBitWidth() == HighCase.getBitWidth());
// FIXME: A range of consecutive cases has 100% density, but only requires one
@@ -8233,7 +8439,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI,
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
}
-bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
unsigned First, unsigned Last,
const SwitchInst *SI,
MachineBasicBlock *DefaultMBB,
@@ -8252,12 +8458,12 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
for (unsigned I = First; I <= Last; ++I) {
assert(Clusters[I].Kind == CC_Range);
Prob += Clusters[I].Prob;
- APInt Low = Clusters[I].Low->getValue();
- APInt High = Clusters[I].High->getValue();
+ const APInt &Low = Clusters[I].Low->getValue();
+ const APInt &High = Clusters[I].High->getValue();
NumCmps += (Low == High) ? 1 : 2;
if (I != First) {
// Fill the gap between this and the previous cluster.
- APInt PreviousHigh = Clusters[I - 1].High->getValue();
+ const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
assert(PreviousHigh.slt(Low));
uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
for (uint64_t J = 0; J < Gap; J++)
@@ -8325,26 +8531,37 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
if (!areJTsAllowed(TLI, SI))
return;
+ const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
+
const int64_t N = Clusters.size();
- const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+ const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
+ const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+ const unsigned MaxJumpTableSize =
+ OptForSize || TLI.getMaximumJumpTableSize() == 0
+ ? UINT_MAX : TLI.getMaximumJumpTableSize();
+
+ if (N < 2 || N < MinJumpTableEntries)
+ return;
// TotalCases[i]: Total nbr of cases in Clusters[0..i].
SmallVector<unsigned, 8> TotalCases(N);
-
for (unsigned i = 0; i < N; ++i) {
- APInt Hi = Clusters[i].High->getValue();
- APInt Lo = Clusters[i].Low->getValue();
+ const APInt &Hi = Clusters[i].High->getValue();
+ const APInt &Lo = Clusters[i].Low->getValue();
TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
if (i != 0)
TotalCases[i] += TotalCases[i - 1];
}
- unsigned MinDensity = JumpTableDensity;
- if (DefaultMBB->getParent()->getFunction()->optForSize())
- MinDensity = OptsizeJumpTableDensity;
- if (N >= MinJumpTableSize
- && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) {
- // Cheap case: the whole range might be suitable for jump table.
+ const unsigned MinDensity =
+ OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+
+ // Cheap case: the whole range may be suitable for jump table.
+ unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
+ Clusters[0].Low->getValue())
+ .getLimitedValue(UINT_MAX - 1) + 1;
+ if (JumpTableSize <= MaxJumpTableSize &&
+ isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
@@ -8368,14 +8585,23 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
SmallVector<unsigned, 8> MinPartitions(N);
// LastElement[i] is the last element of the partition starting at i.
SmallVector<unsigned, 8> LastElement(N);
- // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
- SmallVector<unsigned, 8> NumTables(N);
+ // PartitionsScore[i] is used to break ties when choosing between two
+ // partitionings resulting in the same number of partitions.
+ SmallVector<unsigned, 8> PartitionsScore(N);
+ // For PartitionsScore, a small number of comparisons is considered as good as
+ // a jump table and a single comparison is considered better than a jump
+ // table.
+ enum PartitionScores : unsigned {
+ NoTable = 0,
+ Table = 1,
+ FewCases = 1,
+ SingleCase = 2
+ };
// Base case: There is only one way to partition Clusters[N-1].
MinPartitions[N - 1] = 1;
LastElement[N - 1] = N - 1;
- assert(MinJumpTableSize > 1);
- NumTables[N - 1] = 0;
+ PartitionsScore[N - 1] = PartitionScores::SingleCase;
// Note: loop indexes are signed to avoid underflow.
for (int64_t i = N - 2; i >= 0; i--) {
@@ -8383,23 +8609,34 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
// Baseline: Put Clusters[i] into a partition on its own.
MinPartitions[i] = MinPartitions[i + 1] + 1;
LastElement[i] = i;
- NumTables[i] = NumTables[i + 1];
+ PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
// Search for a solution that results in fewer partitions.
for (int64_t j = N - 1; j > i; j--) {
// Try building a partition from Clusters[i..j].
- if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) {
+ JumpTableSize = (Clusters[j].High->getValue() -
+ Clusters[i].Low->getValue())
+ .getLimitedValue(UINT_MAX - 1) + 1;
+ if (JumpTableSize <= MaxJumpTableSize &&
+ isDense(Clusters, TotalCases, i, j, MinDensity)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
- bool IsTable = j - i + 1 >= MinJumpTableSize;
- unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
-
- // If this j leads to fewer partitions, or same number of partitions
- // with more lookup tables, it is a better partitioning.
+ unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+ int64_t NumEntries = j - i + 1;
+
+ if (NumEntries == 1)
+ Score += PartitionScores::SingleCase;
+ else if (NumEntries <= SmallNumberOfEntries)
+ Score += PartitionScores::FewCases;
+ else if (NumEntries >= MinJumpTableEntries)
+ Score += PartitionScores::Table;
+
+ // If this leads to fewer partitions, or to the same number of
+ // partitions with better score, it is a better partitioning.
if (NumPartitions < MinPartitions[i] ||
- (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+ (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
MinPartitions[i] = NumPartitions;
LastElement[i] = j;
- NumTables[i] = Tables;
+ PartitionsScore[i] = Score;
}
}
}
@@ -8414,7 +8651,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
unsigned NumClusters = Last - First + 1;
CaseCluster JTCluster;
- if (NumClusters >= MinJumpTableSize &&
+ if (NumClusters >= MinJumpTableEntries &&
buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
Clusters[DstIndex++] = JTCluster;
} else {
@@ -9107,7 +9344,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
WorkList.pop_back();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
- if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+ !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index b9888ae..abde8a8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -305,12 +305,13 @@ private:
};
/// Check whether a range of clusters is dense enough for a jump table.
- bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
- unsigned First, unsigned Last, unsigned MinDensity);
+ bool isDense(const CaseClusterVector &Clusters,
+ const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last, unsigned MinDensity) const;
/// Build a jump table cluster from Clusters[First..Last]. Returns false if it
/// decides it's not a good idea.
- bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+ bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
unsigned Last, const SwitchInst *SI,
MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
@@ -652,8 +653,6 @@ public:
return CurInst ? CurInst->getDebugLoc() : DebugLoc();
}
- unsigned getSDNodeOrder() const { return SDNodeOrder; }
-
void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
void visit(const Instruction &I);
@@ -875,8 +874,8 @@ private:
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
- void visitMaskedLoad(const CallInst &I);
- void visitMaskedStore(const CallInst &I);
+ void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+ void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
void visitMaskedGather(const CallInst &I);
void visitMaskedScatter(const CallInst &I);
void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
@@ -885,6 +884,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitMemPCpyCall(const CallInst &I);
bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
bool visitStrCmpCall(const CallInst &I);
@@ -941,6 +941,11 @@ private:
/// Update the DAG and DAG builder with the relevant information after
/// a new root node has been created which could be a tail call.
void updateDAGForMaybeTailCall(SDValue MaybeTC);
+
+ /// Return the appropriate SDDbgValue based on N.
+ SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
+ DIExpression *Expr, int64_t Offset, DebugLoc dl,
+ unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 93ac6d6..0faaad8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::JumpTable: return "JumpTable";
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
@@ -120,7 +121,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
- return Intrinsic::getName((Intrinsic::ID)IID);
+ return Intrinsic::getName((Intrinsic::ID)IID, None);
else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
@@ -261,21 +262,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
- case ISD::CONVERT_RNDSAT: {
- switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
- default: llvm_unreachable("Unknown cvt code!");
- case ISD::CVT_FF: return "cvt_ff";
- case ISD::CVT_FS: return "cvt_fs";
- case ISD::CVT_FU: return "cvt_fu";
- case ISD::CVT_SF: return "cvt_sf";
- case ISD::CVT_UF: return "cvt_uf";
- case ISD::CVT_SS: return "cvt_ss";
- case ISD::CVT_SU: return "cvt_su";
- case ISD::CVT_US: return "cvt_us";
- case ISD::CVT_UU: return "cvt_uu";
- }
- }
-
// Control flow instructions
case ISD::BR: return "br";
case ISD::BRIND: return "brind";
@@ -321,7 +307,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
+
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
@@ -424,9 +410,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
OS << '<' << CSDN->getAPIntValue() << '>';
} else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1d61657..64e6c22 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -426,6 +426,10 @@ static void SplitCriticalSideEffectEdges(Function &Fn) {
}
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // If we already selected that function, we do not need to run SDISel.
+ if (mf.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Selected))
+ return false;
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
@@ -594,16 +598,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
// Determine if there are any calls in this machine function.
- MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
for (const auto &MBB : *MF) {
- if (MFI->hasCalls() && MF->hasInlineAsm())
+ if (MFI.hasCalls() && MF->hasInlineAsm())
break;
for (const auto &MI : MBB) {
const MCInstrDesc &MCID = TII->get(MI.getOpcode());
if ((MCID.isCall() && !MCID.isReturn()) ||
MI.isStackAligningInlineAsm()) {
- MFI->setHasCalls(true);
+ MFI.setHasCalls(true);
}
if (MI.isInlineAsm()) {
MF->setHasInlineAsm(true);
@@ -645,7 +649,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
}
if (TLI->hasCopyImplyingStackAdjustment(MF))
- MFI->setHasCopyImplyingStackAdjustment(true);
+ MFI.setHasCopyImplyingStackAdjustment(true);
// Freeze the set of reserved registers now that MachineFrameInfo has been
// set up. All the information required by getReservedRegs() should be
@@ -721,9 +725,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
}
void SelectionDAGISel::CodeGenAndEmitDAG() {
- std::string GroupName;
- if (TimePassesIsEnabled)
- GroupName = "Instruction Selection and Scheduling";
+ StringRef GroupName = "sdag";
+ StringRef GroupDescription = "Instruction Selection and Scheduling";
std::string BlockName;
int BlockNumber = -1;
(void)BlockNumber;
@@ -751,7 +754,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in pre-legalize mode.
{
- NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
+ GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
}
@@ -765,7 +769,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
bool Changed;
{
- NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
Changed = CurDAG->LegalizeTypes();
}
@@ -780,8 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-type-legalize mode.
{
- NamedRegionTimer T("DAG Combining after legalize types", GroupName,
- TimePassesIsEnabled);
+ NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
+ GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
}
@@ -791,13 +796,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
{
- NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
Changed = CurDAG->LegalizeVectors();
}
if (Changed) {
{
- NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
@@ -806,8 +813,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-type-legalize mode.
{
- NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
- TimePassesIsEnabled);
+ NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
+ GroupName, GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
}
@@ -819,7 +826,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->viewGraph("legalize input for " + BlockName);
{
- NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
+ GroupDescription, TimePassesIsEnabled);
CurDAG->Legalize();
}
@@ -831,7 +839,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Run the DAG combiner in post-legalize mode.
{
- NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
+ GroupDescription, TimePassesIsEnabled);
CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
}
@@ -847,7 +856,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
{
- NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("isel", "Instruction Selection", GroupName,
+ GroupDescription, TimePassesIsEnabled);
DoInstructionSelection();
}
@@ -860,8 +870,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Schedule machine code.
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
{
- NamedRegionTimer T("Instruction Scheduling", GroupName,
- TimePassesIsEnabled);
+ NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
+ GroupDescription, TimePassesIsEnabled);
Scheduler->Run(CurDAG, FuncInfo->MBB);
}
@@ -872,7 +882,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// inserted into.
MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
{
- NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+ NamedRegionTimer T("emit", "Instruction Creation", GroupName,
+ GroupDescription, TimePassesIsEnabled);
// FuncInfo->InsertPt is passed by reference and set to the end of the
// scheduled instructions.
@@ -886,8 +897,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Free the scheduler state.
{
- NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
- TimePassesIsEnabled);
+ NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
+ GroupDescription, TimePassesIsEnabled);
delete Scheduler;
}
@@ -1003,10 +1014,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+ MCSymbol *Label = MF->addLandingPad(MBB);
// Assign the call site to the landing pad's begin label.
- MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+ MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
@@ -1153,14 +1164,22 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
return;
FuncInfo->SwiftErrorVals.clear();
- FuncInfo->SwiftErrorMap.clear();
- FuncInfo->SwiftErrorWorklist.clear();
+ FuncInfo->SwiftErrorVRegDefMap.clear();
+ FuncInfo->SwiftErrorVRegUpwardsUse.clear();
+ FuncInfo->SwiftErrorArg = nullptr;
// Check if function has a swifterror argument.
+ bool HaveSeenSwiftErrorArg = false;
for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
AI != AE; ++AI)
- if (AI->hasSwiftErrorAttr())
+ if (AI->hasSwiftErrorAttr()) {
+ assert(!HaveSeenSwiftErrorArg &&
+ "Must have only one swifterror parameter");
+ (void)HaveSeenSwiftErrorArg; // silence warning.
+ HaveSeenSwiftErrorArg = true;
+ FuncInfo->SwiftErrorArg = &*AI;
FuncInfo->SwiftErrorVals.push_back(&*AI);
+ }
for (const auto &LLVMBB : Fn)
for (const auto &Inst : LLVMBB) {
@@ -1170,95 +1189,152 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
}
}
-/// For each basic block, merge incoming swifterror values or simply propagate
-/// them. The merged results will be saved in SwiftErrorMap. For predecessors
-/// that are not yet visited, we create virtual registers to hold the swifterror
-/// values and save them in SwiftErrorWorklist.
-static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo,
- const TargetLowering *TLI,
- const TargetInstrInfo *TII,
- const BasicBlock *LLVMBB,
- SelectionDAGBuilder *SDB) {
+static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const BasicBlock *LLVMBB,
+ SelectionDAGBuilder *SDB) {
if (!TLI->supportSwiftError())
return;
- // We should only do this when we have swifterror parameter or swifterror
+ // We only need to do this when we have swifterror parameter or swifterror
// alloc.
if (FuncInfo->SwiftErrorVals.empty())
return;
- // At beginning of a basic block, insert PHI nodes or get the virtual
- // register from the only predecessor, and update SwiftErrorMap; if one
- // of the predecessors is not visited, update SwiftErrorWorklist.
- // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy
- // to sync up the virtual register assignment.
-
- // Always create a virtual register for each swifterror value in entry block.
- auto &DL = SDB->DAG.getDataLayout();
- const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
- for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ auto &DL = FuncInfo->MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+ continue;
unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
// Assign Undef to Vreg. We construct MI directly to make sure it works
// with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
- FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+ BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+ SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ VReg);
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
}
- return;
}
+}
- if (auto *UniquePred = LLVMBB->getUniquePredecessor()) {
- auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred];
- if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) {
- // Update SwiftErrorWorklist with a new virtual register.
- for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg);
- // Propagate the information from the single predecessor.
- FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
- }
- return;
- }
- // Propagate the information from the single predecessor.
- FuncInfo->SwiftErrorMap[FuncInfo->MBB] =
- FuncInfo->SwiftErrorMap[UniquePredMBB];
+/// Propagate swifterror values through the machine function CFG.
+static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
+ auto *TLI = FuncInfo->TLI;
+ if (!TLI->supportSwiftError())
return;
- }
- // For the case of multiple predecessors, update SwiftErrorWorklist.
- // Handle the case where we have two or more predecessors being the same.
- for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
- PI != PE; ++PI) {
- auto *PredMBB = FuncInfo->MBBMap[*PI];
- if (!FuncInfo->SwiftErrorMap.count(PredMBB) &&
- !FuncInfo->SwiftErrorWorklist.count(PredMBB)) {
- for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // When we actually visit the basic block PredMBB, we will materialize
- // the virtual register assignment in copySwiftErrorsToFinalVRegs.
- FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg);
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (FuncInfo->SwiftErrorVals.empty())
+ return;
+
+ // For each machine basic block in reverse post order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
+ for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+ It = RPOT.begin(),
+ E = RPOT.end();
+ It != E; ++It) {
+ MachineBasicBlock *MBB = *It;
+
+ // For each swifterror value in the function.
+ for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+ auto Key = std::make_pair(MBB, SwiftErrorVal);
+ auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+ auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
+ bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
+ unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
+ bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
+ assert(!(UpwardsUse && !DownwardDef) &&
+ "We can't have an upwards use but no downwards def");
+
+ // If there is no upwards exposed use and an entry for the swifterror in
+ // the def map for this value we don't need to do anything: We already
+ // have a downward def for this basic block.
+ if (!UpwardsUse && DownwardDef)
+ continue;
+
+ // Otherwise we either have an upwards exposed use vreg that we need to
+ // materialize or need to forward the downward def from predecessors.
+
+ // Check whether we have a single vreg def from all predecessors.
+ // Otherwise we need a phi.
+ SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
+ SmallSet<const MachineBasicBlock*, 8> Visited;
+ for (auto *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+ VRegs.push_back(std::make_pair(
+ Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
+ if (Pred != MBB)
+ continue;
+ // We have a self-edge.
+ // If there was no upwards use in this basic block there is now one: the
+ // phi needs to use it self.
+ if (!UpwardsUse) {
+ UpwardsUse = true;
+ UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+ assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
+ UUseVReg = UUseIt->second;
+ }
+ }
+
+ // We need a phi node if we have more than one predecessor with different
+ // downward defs.
+ bool needPHI =
+ VRegs.size() >= 1 &&
+ std::find_if(
+ VRegs.begin(), VRegs.end(),
+ [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
+ -> bool { return V.second != VRegs[0].second; }) !=
+ VRegs.end();
+
+ // If there is no upwards exposed used and we don't need a phi just
+ // forward the swifterror vreg from the predecessor(s).
+ if (!UpwardsUse && !needPHI) {
+ assert(!VRegs.empty() &&
+ "No predecessors? The entry block should bail out earlier");
+ // Just forward the swifterror vreg from the predecessor(s).
+ FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
+ continue;
}
- }
- }
- // For the case of multiple predecessors, create a virtual register for
- // each swifterror value and generate Phi node.
- for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
-
- MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB,
- FuncInfo->MBB->begin(), SDB->getCurDebugLoc(),
- TII->get(TargetOpcode::PHI), VReg);
- for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
- PI != PE; ++PI) {
- auto *PredMBB = FuncInfo->MBBMap[*PI];
- unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ?
- FuncInfo->SwiftErrorMap[PredMBB][I] :
- FuncInfo->SwiftErrorWorklist[PredMBB][I];
- SwiftErrorPHI.addReg(SwiftErrorReg)
- .addMBB(PredMBB);
+ auto DLoc = isa<Instruction>(SwiftErrorVal)
+ ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ : DebugLoc();
+ const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
+
+ // If we don't need a phi create a copy to the upward exposed vreg.
+ if (!needPHI) {
+ assert(UpwardsUse);
+ unsigned DestReg = UUseVReg;
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+ DestReg)
+ .addReg(VRegs[0].second);
+ continue;
+ }
+
+ // We need a phi: if there is an upwards exposed use we already have a
+ // destination virtual register number otherwise we generate a new one.
+ auto &DL = FuncInfo->MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ unsigned PHIVReg =
+ UpwardsUse ? UUseVReg
+ : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder SwiftErrorPHI =
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (auto BBRegPair : VRegs) {
+ SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+ }
+
+ // We did not have a definition in this block before: store the phi's vreg
+ // as this block downward exposed def.
+ if (!UpwardsUse)
+ FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
}
}
}
@@ -1309,7 +1385,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!FuncInfo->MBB)
continue; // Some blocks like catchpads have no code or MBB.
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
- mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB);
+ createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1486,6 +1562,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->PHINodesToUpdate.clear();
}
+ propagateSwiftErrorVRegs(FuncInfo);
+
delete FastIS;
SDB->clearDanglingDebugInfo();
SDB->SPDescriptor.resetPerFunctionState();
@@ -2170,7 +2248,7 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
/// to use the new results.
void SelectionDAGISel::UpdateChains(
SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+ SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
// Now that all the normal results are replaced, we replace the chain and
@@ -2182,6 +2260,11 @@ void SelectionDAGISel::UpdateChains(
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
+ // If ChainNode is null, it's because we replaced it on a previous
+ // iteration and we cleared it out of the map. Just skip it.
+ if (!ChainNode)
+ continue;
+
assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
"Deleted node left in chain");
@@ -2194,6 +2277,11 @@ void SelectionDAGISel::UpdateChains(
if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ SelectionDAG::DAGNodeDeletedListener NDL(
+ *CurDAG, [&](SDNode *N, SDNode *E) {
+ std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,
+ static_cast<SDNode *>(nullptr));
+ });
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
// If the node became dead and we haven't already seen it, delete it.
@@ -2694,14 +2782,15 @@ struct MatchScope {
/// for this.
class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
{
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
- SmallVectorImpl<MatchScope> &MatchScopes;
+ SDNode **NodeToMatch;
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
public:
- MatchStateUpdater(SelectionDAG &DAG,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
- SmallVectorImpl<MatchScope> &MS) :
- SelectionDAG::DAGUpdateListener(DAG),
- RecordedNodes(RN), MatchScopes(MS) { }
+ MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
+ SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
+ SmallVectorImpl<MatchScope> &MS)
+ : SelectionDAG::DAGUpdateListener(DAG), NodeToMatch(NodeToMatch),
+ RecordedNodes(RN), MatchScopes(MS) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
// Some early-returns here to avoid the search if we deleted the node or
@@ -2711,6 +2800,9 @@ public:
// update listener during matching a complex patterns.
if (!E || E->isMachineOpcode())
return;
+ // Check if NodeToMatch was updated.
+ if (N == *NodeToMatch)
+ *NodeToMatch = E;
// Performing linear search here does not matter because we almost never
// run this code. You'd have to have a CSE during complex pattern
// matching.
@@ -3003,7 +3095,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// consistent.
std::unique_ptr<MatchStateUpdater> MSU;
if (ComplexPatternFuncMutatesDAG())
- MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MSU.reset(new MatchStateUpdater(*CurDAG, &NodeToMatch, RecordedNodes,
MatchScopes));
if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
@@ -3388,7 +3480,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
SDNode *E) {
auto &Chain = ChainNodesMatched;
- assert((!E || llvm::find(Chain, N) == Chain.end()) &&
+ assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
});
@@ -3487,7 +3579,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
NodeToMatch->getValueType(i) == MVT::iPTR ||
Res.getValueType() == MVT::iPTR ||
NodeToMatch->getValueType(i).getSizeInBits() ==
- Res.getValueType().getSizeInBits()) &&
+ Res.getValueSizeInBits()) &&
"invalid replacement");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
@@ -3579,7 +3671,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
unsigned iid =
cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
if (iid < Intrinsic::num_intrinsics)
- Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None);
else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
Msg << "target intrinsic %" << TII->getName(iid);
else
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90aaba2..d27e245 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -55,7 +55,8 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
NextSlotToAllocate = 0;
// Need to resize this on each safepoint - we need the two to stay in sync and
// the clear patterns of a SelectionDAGBuilder have no relation to
- // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false.
+ // FunctionLoweringInfo. Also need to ensure used bits get cleared.
+ AllocatedStackSlots.clear();
AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
}
@@ -70,7 +71,7 @@ SDValue
StatepointLoweringState::allocateStackSlot(EVT ValueType,
SelectionDAGBuilder &Builder) {
NumSlotsAllocatedForStatepoints++;
- auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
unsigned SpillSize = ValueType.getSizeInBits() / 8;
assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
@@ -82,16 +83,16 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
const size_t NumSlots = AllocatedStackSlots.size();
assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
- // The stack slots in StatepointStackSlots beyond the first NumSlots were
- // added in this instance of StatepointLoweringState, and cannot be re-used.
- assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() &&
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
"Broken invariant");
for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
- if (MFI->getObjectSize(FI) == SpillSize) {
+ if (MFI.getObjectSize(FI) == SpillSize) {
AllocatedStackSlots.set(NextSlotToAllocate);
+ // TODO: Is ValueType the right thing to use here?
return Builder.DAG.getFrameIndex(FI, ValueType);
}
}
@@ -101,9 +102,13 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MFI->markAsStatepointSpillSlotObjectIndex(FI);
+ MFI.markAsStatepointSpillSlotObjectIndex(FI);
Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+ AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true);
+ assert(AllocatedStackSlots.size() ==
+ Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
StatepointMaxSlotsRequired = std::max<unsigned long>(
StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
@@ -350,9 +355,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// vary since we spill vectors of pointers too). At some point we
// can consider allowing spills of smaller values to larger slots
// (i.e. change the '==' in the assert below to a '>=').
- auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- assert((MFI->getObjectSize(Index) * 8) ==
- Incoming.getValueType().getSizeInBits() &&
+ MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
#endif
@@ -370,7 +374,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// Lower a single value incoming to a statepoint node. This value can be
/// either a deopt value or a gc value, the handling is the same. We special
/// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming,
+static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
SDValue Chain = Builder.getRoot();
@@ -389,6 +393,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
// relocate the address of the alloca itself?)
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Incoming.getValueType()));
+ } else if (LiveInOnly) {
+ // If this value is live in (not live-on-return, or live-through), we can
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
+ // handled by the register allocator. Note that we do not have the notion
+ // of a late use so these values might be placed in registers which are
+ // clobbered by the call. This is fine for live-in.
+ Ops.push_back(Incoming);
} else {
// Otherwise, locate a spill slot and explicitly spill it so it
// can be found by the runtime later. We currently do not support
@@ -439,19 +451,38 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
"non gc managed derived pointer found in statepoint");
}
}
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
} else {
assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
}
#endif
+ // Figure out what lowering strategy we're going to use for each part
+ // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // as "live-through". A "live-through" variable is one which is "live-in",
+ // "live-out", and live throughout the lifetime of the call (i.e. we can find
+ // it from any PC within the transitive callee of the statepoint). In
+ // particular, if the callee spills callee preserved registers we may not
+ // be able to find a value placed in that register during the call. This is
+ // fine for live-out, but not for live-through. If we were willing to make
+ // assumptions about the code generator producing the callee, we could
+ // potentially allow live-through values in callee saved registers.
+ const bool LiveInDeopt =
+ SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+ auto isGCValue =[&](const Value *V) {
+ return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+ };
+
// Before we actually start lowering (and allocating spill slots for values),
// reserve any stack slots which we judge to be profitable to reuse for a
// particular value. This is purely an optimization over the code below and
// doesn't change semantics at all. It is important for performance that we
// reserve slots for both deopt and gc values before lowering either.
for (const Value *V : SI.DeoptState) {
- reservePreviousStackSlotForValue(V, Builder);
+ if (!LiveInDeopt || isGCValue(V))
+ reservePreviousStackSlotForValue(V, Builder);
}
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
reservePreviousStackSlotForValue(SI.Bases[i], Builder);
@@ -468,7 +499,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// what type of values are contained within.
for (const Value *V : SI.DeoptState) {
SDValue Incoming = Builder.getValue(V);
- lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ const bool LiveInValue = LiveInDeopt && !isGCValue(V);
+ lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -478,10 +510,12 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// (base[0], ptr[0], base[1], ptr[1], ...)
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
+ Ops, Builder);
const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
+ Ops, Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
@@ -889,7 +923,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
#ifndef NDEBUG
// Consistency check
- // We skip this check for relocates not in the same basic block as thier
+ // We skip this check for relocates not in the same basic block as their
// statepoint. It would be too expensive to preserve validation info through
// different basic blocks.
if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 806646f..690f0d2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -216,7 +216,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
- (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128;
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
(VT == MVT::f64) ? RTLIB::OEQ_F64 :
(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
@@ -418,6 +418,58 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
return false;
}
+bool
+TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
+ unsigned OpIdx,
+ const APInt &Demanded,
+ DAGCombinerInfo &DCI) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op = User->getOperand(OpIdx);
+ APInt KnownZero, KnownOne;
+
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
+ *this, 0, true))
+ return false;
+
+
+ // Old will not always be the same as Op. For example:
+ //
+ // Demanded = 0xffffff
+ // Op = i64 truncate (i32 and x, 0xffffff)
+ // In this case simplify demand bits will want to replace the 'and' node
+ // with the value 'x', which will give us:
+ // Old = i32 and x, 0xffffff
+ // New = x
+ if (Old.hasOneUse()) {
+ // For the one use case, we just commit the change.
+ DCI.CommitTargetLoweringOpt(*this);
+ return true;
+ }
+
+ // If Old has more than one use then it must be Op, because the
+ // AssumeSingleUse flag is not propogated to recursive calls of
+ // SimplifyDemanded bits, so the only node with multiple use that
+ // it will attempt to combine will be opt.
+ assert(Old == Op);
+
+ SmallVector <SDValue, 4> NewOps;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (i == OpIdx) {
+ NewOps.push_back(New);
+ continue;
+ }
+ NewOps.push_back(User->getOperand(i));
+ }
+ DAG.UpdateNodeOperands(User, NewOps);
+ // Op has less users now, so we may be able to perform additional combines
+ // with it.
+ DCI.AddToWorklist(Op.getNode());
+ // User's operands have been updated, so we may be able to do new combines
+ // with it.
+ DCI.AddToWorklist(User);
+ return true;
+}
+
/// Look at Op. At this point, we know that only the DemandedMask bits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -430,9 +482,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
TargetLoweringOpt &TLO,
- unsigned Depth) const {
+ unsigned Depth,
+ bool AssumeSingleUse) const {
unsigned BitWidth = DemandedMask.getBitWidth();
- assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ assert(Op.getScalarValueSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
APInt NewMask = DemandedMask;
SDLoc dl(Op);
@@ -442,7 +495,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownOne = APInt(BitWidth, 0);
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse()) {
+ if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
// If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
@@ -468,22 +521,63 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
KnownZero = ~KnownOne;
return false; // Don't fall through, will infinitely loop.
+ case ISD::BUILD_VECTOR:
+ // Collect the known bits that are shared by every constant vector element.
+ KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ for (SDValue SrcOp : Op->ops()) {
+ if (!isa<ConstantSDNode>(SrcOp)) {
+ // We can only handle all constant values - bail out with no known bits.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+ return false;
+ }
+ KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
+ KnownZero2 = ~KnownOne2;
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (KnownOne2.getBitWidth() != BitWidth) {
+ assert(KnownOne2.getBitWidth() > BitWidth &&
+ KnownZero2.getBitWidth() > BitWidth &&
+ "Expected BUILD_VECTOR implicit truncation");
+ KnownOne2 = KnownOne2.trunc(BitWidth);
+ KnownZero2 = KnownZero2.trunc(BitWidth);
+ }
+
+ // Known bits are the values that are shared by every element.
+ // TODO: support per-element known bits.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ }
+ return false; // Don't fall through, will infinitely loop.
case ISD::AND:
// If the RHS is a constant, check to see if the LHS would be zero without
// using the bits from the RHS. Below, we use knowledge about the RHS to
// simplify the LHS, here we're using information from the LHS to simplify
// the RHS.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ SDValue Op0 = Op.getOperand(0);
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+ TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
- return TLO.CombineTo(Op, Op.getOperand(0));
+ return TLO.CombineTo(Op, Op0);
+
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
return true;
+
+ // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+ // constant, but if this 'and' is only clearing bits that were just set by
+ // the xor, then this 'and' can be eliminated by shrinking the mask of
+ // the xor. For example, for a 32-bit X:
+ // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+ if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+ LHSOne == ~RHSC->getAPIntValue()) {
+ SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
+ Op0.getOperand(0), Op.getOperand(1));
+ return TLO.CombineTo(Op, Xor);
+ }
}
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
@@ -599,10 +693,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
- // if we can't force bits, try to shrink constant
+ // If we can't force bits, try to shrink the constant.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt Expanded = C->getAPIntValue() | (~NewMask);
- // if we can expand it to have all bits set, do it
+ // If we can expand it to have all bits set, do it.
if (Expanded.isAllOnesValue()) {
if (Expanded != C->getAPIntValue()) {
EVT VT = Op.getValueType();
@@ -610,7 +704,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getConstant(Expanded, dl, VT));
return TLO.CombineTo(Op, New);
}
- // if it already has all the bits set, nothing to change
+ // If it already has all the bits set, nothing to change
// but don't shrink either!
} else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
return true;
@@ -823,7 +917,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// demand the input sign bit.
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
- InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+ InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
@@ -866,9 +960,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
if (MsbMask == NewMask) {
- unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ unsigned ShAmt = ExVT.getScalarSizeInBits();
SDValue InOp = Op.getOperand(0);
- unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
bool AlreadySignExtended =
TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
// However if the input is already sign extended we expect the sign
@@ -892,17 +986,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - ExVT.getScalarType().getSizeInBits());
+ BitWidth - ExVT.getScalarSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit =
- APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
- ExVT.getScalarType().getSizeInBits()) &
+ ExVT.getScalarSizeInBits()) &
NewMask;
// Since the sign extended bits are demanded, we know that the sign
@@ -919,8 +1013,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
- return TLO.CombineTo(Op,
- TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+ return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
+ Op.getOperand(0), dl, ExVT.getScalarType()));
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
@@ -957,8 +1051,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::ZERO_EXTEND: {
- unsigned OperandBitWidth =
- Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt InMask = NewMask.trunc(OperandBitWidth);
// If none of the top bits are demanded, convert this into an any_extend.
@@ -980,7 +1073,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarType().getSizeInBits();
+ unsigned InBits = InVT.getScalarSizeInBits();
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
APInt NewBits = ~InMask & NewMask;
@@ -1020,8 +1113,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::ANY_EXTEND: {
- unsigned OperandBitWidth =
- Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
@@ -1034,8 +1126,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::TRUNCATE: {
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
- unsigned OperandBitWidth =
- Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
APInt TruncMask = NewMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
@@ -1109,7 +1200,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (!TLO.LegalOperations() &&
!Op.getValueType().isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
- NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -1120,10 +1211,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
- unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ unsigned OpVTSizeInBits = Op.getValueSizeInBits();
if (!OpVTLegal && OpVTSizeInBits > 32)
Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
- unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ unsigned ShVal = Op.getValueSizeInBits() - 1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
Op.getValueType(),
@@ -1139,16 +1230,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt LoMask = APInt::getLowBitsSet(BitWidth,
BitWidth - NewMask.countLeadingZeros());
if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
- KnownOne2, TLO, Depth+1))
- return true;
- if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
- KnownOne2, TLO, Depth+1))
- return true;
- // See if the operation should be performed at a smaller bit width.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ KnownOne2, TLO, Depth+1) ||
+ SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1) ||
+ // See if the operation should be performed at a smaller bit width.
+ TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+ if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ SDNodeFlags NewFlags = *Flags;
+ NewFlags.setNoSignedWrap(false);
+ NewFlags.setNoUnsignedWrap(false);
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1),
+ &NewFlags);
+ return TLO.CombineTo(Op, NewOp);
+ }
return true;
+ }
+ LLVM_FALLTHROUGH;
}
- // FALL THROUGH
default:
// Just use computeKnownBits to compute output bits.
TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
@@ -1214,11 +1316,11 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!BV)
return false;
- BitVector UndefElements;
- CN = BV->getConstantSplatNode(&UndefElements);
- // Only interested in constant splats, and we don't try to handle undef
- // elements in identifying boolean constants.
- if (!CN || UndefElements.none())
+ // Only interested in constant splats, we don't care about undef
+ // elements in identifying boolean constants and getConstantSplatNode
+ // returns NULL if all ops are undef;
+ CN = BV->getConstantSplatNode();
+ if (!CN)
return false;
}
@@ -1254,11 +1356,11 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!BV)
return false;
- BitVector UndefElements;
- CN = BV->getConstantSplatNode(&UndefElements);
- // Only interested in constant splats, and we don't try to handle undef
- // elements in identifying boolean constants.
- if (!CN || UndefElements.none())
+ // Only interested in constant splats, we don't care about undef
+ // elements in identifying boolean constants and getConstantSplatNode
+ // returns NULL if all ops are undef;
+ CN = BV->getConstantSplatNode();
+ if (!CN)
return false;
}
@@ -1390,7 +1492,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &ShAmt
= cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ ShAmt == Log2_32(N0.getValueSizeInBits())) {
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
// (srl (ctlz x), 5) == 0 -> X != 0
// (srl (ctlz x), 5) != 1 -> X != 0
@@ -1412,8 +1514,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
CTPOP = N0.getOperand(0);
if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
- (N0 == CTPOP || N0.getValueType().getSizeInBits() >
- Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ (N0 == CTPOP ||
+ N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
@@ -1478,6 +1580,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+ if (MinBits == 1 && C1 == 1)
+ // Invert the condition.
+ return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}
@@ -1530,7 +1636,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
if (!Lod->isVolatile() && Lod->isUnindexed()) {
- unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
@@ -1577,7 +1683,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
- unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+ unsigned InSize = N0.getOperand(0).getValueSizeInBits();
// If the comparison constant has bits in the upper part, the
// zero-extended value could never match.
@@ -2297,7 +2403,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Op);
return;
}
- // fall through
+ LLVM_FALLTHROUGH;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 's': { // Relocatable Constant
@@ -2946,7 +3052,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
else
- return SDValue(); // No mulhu or equvialent
+ return SDValue(); // No mulhu or equivalent
Created->push_back(Q.getNode());
@@ -2987,108 +3093,190 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
// Legalization Utilities
//===----------------------------------------------------------------------===//
-bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
- SelectionDAG &DAG, SDValue LL, SDValue LH,
- SDValue RL, SDValue RH) const {
- EVT VT = N->getValueType(0);
- SDLoc dl(N);
-
- bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
- bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
- bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
- bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
- if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
- unsigned OuterBitSize = VT.getSizeInBits();
- unsigned InnerBitSize = HiLoVT.getSizeInBits();
- unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
- unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
-
- // LL, LH, RL, and RH must be either all NULL or all set to a value.
- assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
- (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
-
- if (!LL.getNode() && !RL.getNode() &&
- isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
- LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
- RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
- }
-
- if (!LL.getNode())
- return false;
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+ SDValue LHS, SDValue RHS,
+ SmallVectorImpl<SDValue> &Result,
+ EVT HiLoVT, SelectionDAG &DAG,
+ MulExpansionKind Kind, SDValue LL,
+ SDValue LH, SDValue RL, SDValue RH) const {
+ assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
+ Opcode == ISD::SMUL_LOHI);
+
+ bool HasMULHS = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+ bool HasMULHU = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+ bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+ bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+
+ if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
+ return false;
- APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
- if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
- DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
- // The inputs are both zero-extended.
- if (HasUMUL_LOHI) {
- // We can emit a umul_lohi.
- Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
- RL);
- Hi = SDValue(Lo.getNode(), 1);
- return true;
- }
- if (HasMULHU) {
- // We can emit a mulhu+mul.
- Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
- return true;
- }
+ unsigned OuterBitSize = VT.getScalarSizeInBits();
+ unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
+ unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
+
+ // LL, LH, RL, and RH must be either all NULL or all set to a value.
+ assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+ (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+ SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
+ auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
+ bool Signed) -> bool {
+ if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
+ Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
}
- if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
- // The input values are both sign-extended.
- if (HasSMUL_LOHI) {
- // We can emit a smul_lohi.
- Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
- RL);
- Hi = SDValue(Lo.getNode(), 1);
- return true;
- }
- if (HasMULHS) {
- // We can emit a mulhs+mul.
- Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
- return true;
- }
+ if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
+ Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
+ return true;
}
+ return false;
+ };
- if (!LH.getNode() && !RH.getNode() &&
- isOperationLegalOrCustom(ISD::SRL, VT) &&
- isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
- auto &DL = DAG.getDataLayout();
- unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
- SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
- LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
- LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
- RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
- RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
- }
+ SDValue Lo, Hi;
- if (!LH.getNode())
- return false;
+ if (!LL.getNode() && !RL.getNode() &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
+ RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
+ }
+
+ if (!LL.getNode())
+ return false;
- if (HasUMUL_LOHI) {
- // Lo,Hi = umul LHS, RHS.
- SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
- DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
- Lo = UMulLOHI;
- Hi = UMulLOHI.getValue(1);
- RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
- LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
- Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
- Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(LHS, HighMask) &&
+ DAG.MaskedValueIsZero(RHS, HighMask)) {
+ // The inputs are both zero-extended.
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
+ if (Opcode != ISD::MUL) {
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ Result.push_back(Zero);
+ Result.push_back(Zero);
+ }
return true;
}
- if (HasMULHU) {
- Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
- RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
- LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
- Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
- Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ }
+
+ if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
+ RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ // TODO non-MUL case?
+ if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
+ Result.push_back(Lo);
+ Result.push_back(Hi);
return true;
}
}
- return false;
+
+ unsigned ShiftAmount = OuterBitSize - InnerBitSize;
+ EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
+ // FIXME getShiftAmountTy does not always return a sensible result when VT
+ // is an illegal type, and so the type may be too small to fit the shift
+ // amount. Override it with i32. The shift will have to be legalized.
+ ShiftAmountTy = MVT::i32;
+ }
+ SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+
+ if (!LH.getNode() && !RH.getNode() &&
+ isOperationLegalOrCustom(ISD::SRL, VT) &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
+ LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+ RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
+ RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+ }
+
+ if (!LH.getNode())
+ return false;
+
+ if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
+ return false;
+
+ Result.push_back(Lo);
+
+ if (Opcode == ISD::MUL) {
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ Result.push_back(Hi);
+ return true;
+ }
+
+ // Compute the full width result.
+ auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+ };
+
+ SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+ if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
+ return false;
+
+ // This is effectively the add part of a multiply-add of half-sized operands,
+ // so it cannot overflow.
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
+ return false;
+
+ Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+ Merge(Lo, Hi));
+
+ SDValue Carry = Next.getValue(1);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+
+ if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
+ return false;
+
+ SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+ Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+ Carry);
+ Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+ if (Opcode == ISD::SMUL_LOHI) {
+ SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
+ Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
+
+ NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
+ Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
+ }
+
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+ Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+ return true;
+}
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+ SelectionDAG &DAG, MulExpansionKind Kind,
+ SDValue LL, SDValue LH, SDValue RL,
+ SDValue RH) const {
+ SmallVector<SDValue, 2> Result;
+ bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+ N->getOperand(0), N->getOperand(1), Result, HiLoVT,
+ DAG, Kind, LL, LH, RL, RH);
+ if (Ok) {
+ assert(Result.size() == 2);
+ Lo = Result[0];
+ Hi = Result[1];
+ }
+ return Ok;
}
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
@@ -3190,7 +3378,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
}
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
- SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
+ SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
return DAG.getMergeValues({ Value, NewChain }, SL);
}
@@ -3518,6 +3706,81 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
return Result;
}
+SDValue
+TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
+ const SDLoc &DL, EVT DataVT,
+ SelectionDAG &DAG,
+ bool IsCompressedMemory) const {
+ SDValue Increment;
+ EVT AddrVT = Addr.getValueType();
+ EVT MaskVT = Mask.getValueType();
+ assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+ "Incompatible types of Data and Mask");
+ if (IsCompressedMemory) {
+ // Incrementing the pointer according to number of '1's in the mask.
+ EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+ SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+ if (MaskIntVT.getSizeInBits() < 32) {
+ MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+ MaskIntVT = MVT::i32;
+ }
+
+ // Count '1's with POPCNT.
+ Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
+ Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
+ // Scale is an element size in bytes.
+ SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
+ AddrVT);
+ Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+ } else
+ Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+
+ return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
+}
+
+static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
+ SDValue Idx,
+ EVT VecVT,
+ const SDLoc &dl) {
+ if (isa<ConstantSDNode>(Idx))
+ return Idx;
+
+ EVT IdxVT = Idx.getValueType();
+ unsigned NElts = VecVT.getVectorNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
+ Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
+
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
+ DAG.getConstant(NElts - 1, dl, IdxVT));
+}
+
+SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
+ SDValue VecPtr, EVT VecVT,
+ SDValue Index) const {
+ SDLoc dl(Index);
+ // Make sure the index type is big enough to compute in.
+ Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
+
+ EVT EltVT = VecVT.getVectorElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
+
+ EVT IdxVT = Index.getValueType();
+
+ Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getConstant(EltSize, dl, IdxVT));
+ return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr);
+}
+
//===----------------------------------------------------------------------===//
// Implementation of Emulated TLS Model
//===----------------------------------------------------------------------===//
@@ -3550,11 +3813,36 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
// At last for X86 targets, maybe good for other targets too?
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- MFI->setAdjustsStack(true); // Is this only for X86 target?
- MFI->setHasCalls(true);
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setAdjustsStack(true); // Is this only for X86 target?
+ MFI.setHasCalls(true);
assert((GA->getOffset() == 0) &&
"Emulated TLS must have zero offset in GlobalAddressSDNode");
return CallResult.first;
}
+
+SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
+ if (!isCtlzFast())
+ return SDValue();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc dl(Op);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ }
+ return SDValue();
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 1efc440..ff7d205 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
using namespace llvm;
@@ -81,121 +82,6 @@ ShadowStackGCLowering::ShadowStackGCLowering()
initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
}
-namespace {
-/// EscapeEnumerator - This is a little algorithm to find all escape points
-/// from a function so that "finally"-style code can be inserted. In addition
-/// to finding the existing return and unwind instructions, it also (if
-/// necessary) transforms any call instructions into invokes and sends them to
-/// a landing pad.
-///
-/// It's wrapped up in a state machine using the same transform C# uses for
-/// 'yield return' enumerators, This transform allows it to be non-allocating.
-class EscapeEnumerator {
- Function &F;
- const char *CleanupBBName;
-
- // State.
- int State;
- Function::iterator StateBB, StateE;
- IRBuilder<> Builder;
-
-public:
- EscapeEnumerator(Function &F, const char *N = "cleanup")
- : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
- IRBuilder<> *Next() {
- switch (State) {
- default:
- return nullptr;
-
- case 0:
- StateBB = F.begin();
- StateE = F.end();
- State = 1;
-
- case 1:
- // Find all 'return', 'resume', and 'unwind' instructions.
- while (StateBB != StateE) {
- BasicBlock *CurBB = &*StateBB++;
-
- // Branches and invokes do not escape, only unwind, resume, and return
- // do.
- TerminatorInst *TI = CurBB->getTerminator();
- if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
- continue;
-
- Builder.SetInsertPoint(TI);
- return &Builder;
- }
-
- State = 2;
-
- // Find all 'call' instructions.
- SmallVector<Instruction *, 16> Calls;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
- ++II)
- if (CallInst *CI = dyn_cast<CallInst>(II))
- if (!CI->getCalledFunction() ||
- !CI->getCalledFunction()->getIntrinsicID())
- Calls.push_back(CI);
-
- if (Calls.empty())
- return nullptr;
-
- // Create a cleanup block.
- LLVMContext &C = F.getContext();
- BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
- Type *ExnTy =
- StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
- if (!F.hasPersonalityFn()) {
- Constant *PersFn = F.getParent()->getOrInsertFunction(
- "__gcc_personality_v0",
- FunctionType::get(Type::getInt32Ty(C), true));
- F.setPersonalityFn(PersFn);
- }
- LandingPadInst *LPad =
- LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
- LPad->setCleanup(true);
- ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
-
- // Transform the 'call' instructions into 'invoke's branching to the
- // cleanup block. Go in reverse order to make prettier BB names.
- SmallVector<Value *, 16> Args;
- for (unsigned I = Calls.size(); I != 0;) {
- CallInst *CI = cast<CallInst>(Calls[--I]);
-
- // Split the basic block containing the function call.
- BasicBlock *CallBB = CI->getParent();
- BasicBlock *NewBB = CallBB->splitBasicBlock(
- CI->getIterator(), CallBB->getName() + ".cont");
-
- // Remove the unconditional branch inserted at the end of CallBB.
- CallBB->getInstList().pop_back();
- NewBB->getInstList().remove(CI);
-
- // Create a new invoke instruction.
- Args.clear();
- CallSite CS(CI);
- Args.append(CS.arg_begin(), CS.arg_end());
-
- InvokeInst *II =
- InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
- CI->getName(), CallBB);
- II->setCallingConv(CI->getCallingConv());
- II->setAttributes(CI->getAttributes());
- CI->replaceAllUsesWith(II);
- delete CI;
- }
-
- Builder.SetInsertPoint(RI);
- return &Builder;
- }
- }
-};
-}
-
-
Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
// doInitialization creates the abstract type of this value.
Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index d361a6c..4837495 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -199,9 +199,7 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const override {
- return "Shrink Wrapping analysis";
- }
+ StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
/// \brief Perform the shrink-wrapping analysis and update
/// the MachineFrameInfo attached to \p MF with the results.
@@ -256,8 +254,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
/// \brief Helper function to find the immediate (post) dominator.
template <typename ListOfBBs, typename DominanceAnalysis>
-MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
- DominanceAnalysis &Dom) {
+static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+ DominanceAnalysis &Dom) {
MachineBasicBlock *IDom = &Block;
for (MachineBasicBlock *BB : BBs) {
IDom = Dom.findNearestCommonDominator(IDom, BB);
@@ -521,9 +519,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
<< ' ' << Save->getName() << "\nRestore: "
<< Restore->getNumber() << ' ' << Restore->getName() << '\n');
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setSavePoint(Save);
- MFI->setRestorePoint(Restore);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MFI.setSavePoint(Save);
+ MFI.setRestorePoint(Restore);
++NumCandidates;
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index ce01c5f..209bbe5 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -58,7 +58,7 @@ public:
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {}
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "SJLJ Exception Handling preparation";
}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 07be24b..1c6a84e 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -381,9 +381,59 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
}
#endif
+LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
+ LiveInterval &LI) {
+ for (LiveInterval::SubRange &S : LI.subranges())
+ if (S.LaneMask == LM)
+ return S;
+ llvm_unreachable("SubRange for this mask not found");
+}
+
+void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
+ if (!LI.hasSubRanges()) {
+ LI.createDeadDef(VNI);
+ return;
+ }
+
+ SlotIndex Def = VNI->def;
+ if (Original) {
+ // If we are transferring a def from the original interval, make sure
+ // to only update the subranges for which the original subranges had
+ // a def at this location.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent());
+ VNInfo *PV = PS.getVNInfoAt(Def);
+ if (PV != nullptr && PV->def == Def)
+ S.createDeadDef(Def, LIS.getVNInfoAllocator());
+ }
+ } else {
+ // This is a new def: either from rematerialization, or from an inserted
+ // copy. Since rematerialization can regenerate a definition of a sub-
+ // register, we need to check which subranges need to be updated.
+ const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
+ assert(DefMI != nullptr);
+ LaneBitmask LM;
+ for (const MachineOperand &DefOp : DefMI->defs()) {
+ unsigned R = DefOp.getReg();
+ if (R != LI.reg)
+ continue;
+ if (unsigned SR = DefOp.getSubReg())
+ LM |= TRI.getSubRegIndexLaneMask(SR);
+ else {
+ LM = MRI.getMaxLaneMaskForVReg(R);
+ break;
+ }
+ }
+ for (LiveInterval::SubRange &S : LI.subranges())
+ if ((S.LaneMask & LM).any())
+ S.createDeadDef(Def, LIS.getVNInfoAllocator());
+ }
+}
+
VNInfo *SplitEditor::defValue(unsigned RegIdx,
const VNInfo *ParentVNI,
- SlotIndex Idx) {
+ SlotIndex Idx,
+ bool Original) {
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
@@ -392,28 +442,28 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
// Create a new value.
VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
+ bool Force = LI->hasSubRanges();
+ ValueForcePair FP(Force ? nullptr : VNI, Force);
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator, bool> InsP =
- Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
- ValueForcePair(VNI, false)));
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP));
- // This was the first time (RegIdx, ParentVNI) was mapped.
- // Keep it as a simple def without any liveness.
- if (InsP.second)
+ // This was the first time (RegIdx, ParentVNI) was mapped, and it is not
+ // forced. Keep it as a simple def without any liveness.
+ if (!Force && InsP.second)
return VNI;
// If the previous value was a simple mapping, add liveness for it now.
if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
- SlotIndex Def = OldVNI->def;
- LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI));
- // No longer a simple mapping. Switch to a complex, non-forced mapping.
- InsP.first->second = ValueForcePair();
+ addDeadDef(*LI, OldVNI, Original);
+
+ // No longer a simple mapping. Switch to a complex mapping. If the
+ // interval has subranges, make it a forced mapping.
+ InsP.first->second = ValueForcePair(nullptr, Force);
}
// This is a complex mapping, add liveness for VNI
- SlotIndex Def = VNI->def;
- LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
-
+ addDeadDef(*LI, VNI, Original);
return VNI;
}
@@ -431,9 +481,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
// This was previously a single mapping. Make sure the old def is represented
// by a trivial live range.
- SlotIndex Def = VNI->def;
- LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
- LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
+ addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false);
+
// Mark as complex mapped, forced.
VFP = ValueForcePair(nullptr, true);
}
@@ -455,13 +504,18 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
- LiveRangeEdit::Remat RM(ParentVNI);
- RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
- if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
- ++NumRemats;
- } else {
+ bool DidRemat = false;
+ if (OrigVNI) {
+ LiveRangeEdit::Remat RM(ParentVNI);
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+ if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ ++NumRemats;
+ DidRemat = true;
+ }
+ }
+ if (!DidRemat) {
// Can't remat, just insert a copy from parent.
CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
.addReg(Edit->getReg());
@@ -472,7 +526,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
}
// Define the value in Reg.
- return defValue(RegIdx, ParentVNI, Def);
+ return defValue(RegIdx, ParentVNI, Def, false);
}
/// Create a new virtual register and live interval.
@@ -621,7 +675,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
}
VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
- MBB.SkipPHIsAndLabels(MBB.begin()));
+ MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
RegAssign.insert(Start, VNI->def, OpenIdx);
DEBUG(dump());
return VNI->def;
@@ -944,14 +998,15 @@ bool SplitEditor::transferValues() {
}
// The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
- DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
- LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
+ << '(' << PrintReg(Edit->get(RegIdx)) << ')');
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
// Check for a simply defined value that can be blitted directly.
ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
if (VNInfo *VNI = VFP.getPointer()) {
DEBUG(dbgs() << ':' << VNI->id);
- LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+ LI.addSegment(LiveInterval::Segment(Start, End, VNI));
Start = End;
continue;
}
@@ -975,7 +1030,7 @@ bool SplitEditor::transferValues() {
// The first block may be live-in, or it may have its own def.
if (Start != BlockStart) {
- VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped value");
DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
// MBB has its own def. Is it also live-out?
@@ -995,7 +1050,7 @@ bool SplitEditor::transferValues() {
if (BlockStart == ParentVNI->def) {
// This block has the def of a parent PHI, so it isn't live-in.
assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
- VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
assert(VNI && "Missing def for complex mapped parent PHI");
if (End >= BlockEnd)
LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
@@ -1003,10 +1058,10 @@ bool SplitEditor::transferValues() {
// This block needs a live-in value. The last block covered may not
// be live-out.
if (End < BlockEnd)
- LRC.addLiveInBlock(LR, MDT[&*MBB], End);
+ LRC.addLiveInBlock(LI, MDT[&*MBB], End);
else {
// Live-through, and we don't know the value.
- LRC.addLiveInBlock(LR, MDT[&*MBB]);
+ LRC.addLiveInBlock(LI, MDT[&*MBB]);
LRC.setLiveOutValue(&*MBB, nullptr);
}
}
@@ -1025,42 +1080,90 @@ bool SplitEditor::transferValues() {
return Skipped;
}
+static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) {
+ const LiveRange::Segment *Seg = LR.getSegmentContaining(Def);
+ if (Seg == nullptr)
+ return true;
+ if (Seg->end != Def.getDeadSlot())
+ return false;
+ // This is a dead PHI. Remove it.
+ LR.removeSegment(*Seg, true);
+ return true;
+}
+
+void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+ LiveRange &LR, LaneBitmask LM,
+ ArrayRef<SlotIndex> Undefs) {
+ for (MachineBasicBlock *P : B.predecessors()) {
+ SlotIndex End = LIS.getMBBEndIdx(P);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ LiveInterval &PLI = Edit->getParent();
+ // Need the cast because the inputs to ?: would otherwise be deemed
+ // "incompatible": SubRange vs LiveInterval.
+ LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
+ : static_cast<LiveRange&>(PLI);
+ if (PSR.liveAt(LastUse))
+ LRC.extend(LR, End, /*PhysReg=*/0, Undefs);
+ }
+}
+
void SplitEditor::extendPHIKillRanges() {
// Extend live ranges to be live-out for successor PHI values.
- for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
- if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
- continue;
- unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
- LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
-
- // Check whether PHI is dead.
- const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
- assert(Segment != nullptr && "Missing segment for VNI");
- if (Segment->end == PHIVNI->def.getDeadSlot()) {
- // This is a dead PHI. Remove it.
- LR.removeSegment(*Segment, true);
+
+ // Visit each PHI def slot in the parent live interval. If the def is dead,
+ // remove it. Otherwise, extend the live interval to reach the end indexes
+ // of all predecessor blocks.
+
+ LiveInterval &ParentLI = Edit->getParent();
+ for (const VNInfo *V : ParentLI.valnos) {
+ if (V->isUnused() || !V->isPHIDef())
continue;
- }
+ unsigned RegIdx = RegAssign.lookup(V->def);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
LiveRangeCalc &LRC = getLRCalc(RegIdx);
- MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- SlotIndex End = LIS.getMBBEndIdx(*PI);
- SlotIndex LastUse = End.getPrevSlot();
- // The predecessor may not have a live-out value. That is OK, like an
- // undef PHI operand.
- if (Edit->getParent().liveAt(LastUse)) {
- assert(RegAssign.lookup(LastUse) == RegIdx &&
- "Different register assignment in phi predecessor");
- LRC.extend(LR, End);
- }
+ MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+ if (!removeDeadSegment(V->def, LI))
+ extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
+ }
+
+ SmallVector<SlotIndex, 4> Undefs;
+ LiveRangeCalc SubLRC;
+
+ for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
+ for (const VNInfo *V : PS.valnos) {
+ if (V->isUnused() || !V->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(V->def);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+ LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI);
+ if (removeDeadSegment(V->def, S))
+ continue;
+
+ MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+ SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ Undefs.clear();
+ LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes());
+ extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs);
}
}
}
/// rewriteAssigned - Rewrite all uses of Edit->getReg().
void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ struct ExtPoint {
+ ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
+ : MO(O), RegIdx(R), Next(N) {}
+ MachineOperand MO;
+ unsigned RegIdx;
+ SlotIndex Next;
+ };
+
+ SmallVector<ExtPoint,4> ExtPoints;
+
for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
RE = MRI.reg_end(); RI != RE;) {
MachineOperand &MO = *RI;
@@ -1082,8 +1185,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
- LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
- MO.setReg(LI->reg);
+ LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+ MO.setReg(LI.reg);
DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
<< Idx << ':' << RegIdx << '\t' << *MI);
@@ -1095,7 +1198,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
if (MO.isDef()) {
if (!MO.getSubReg() && !MO.isEarlyClobber())
continue;
- // We may wan't to extend a live range for a partial redef, or for a use
+ // We may want to extend a live range for a partial redef, or for a use
// tied to an early clobber.
Idx = Idx.getPrevSlot();
if (!Edit->getParent().liveAt(Idx))
@@ -1103,7 +1206,53 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
} else
Idx = Idx.getRegSlot(true);
- getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot());
+ SlotIndex Next = Idx.getNextSlot();
+ if (LI.hasSubRanges()) {
+ // We have to delay extending subranges until we have seen all operands
+ // defining the register. This is because a <def,read-undef> operand
+ // will create an "undef" point, and we cannot extend any subranges
+ // until all of them have been accounted for.
+ if (MO.isUse())
+ ExtPoints.push_back(ExtPoint(MO, RegIdx, Next));
+ } else {
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
+ }
+ }
+
+ for (ExtPoint &EP : ExtPoints) {
+ LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx));
+ assert(LI.hasSubRanges());
+
+ LiveRangeCalc SubLRC;
+ unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+ LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LM).none())
+ continue;
+ // The problem here can be that the new register may have been created
+ // for a partially defined original register. For example:
+ // %vreg827:subreg_hireg<def,read-undef> = ...
+ // ...
+ // %vreg828<def> = COPY %vreg827
+ if (S.empty())
+ continue;
+ SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ SmallVector<SlotIndex, 4> Undefs;
+ LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes());
+ SubLRC.extend(S, EP.Next, 0, Undefs);
+ }
+ }
+
+ for (unsigned R : *Edit) {
+ LiveInterval &LI = LIS.getInterval(R);
+ if (!LI.hasSubRanges())
+ continue;
+ LI.clear();
+ LI.removeEmptySubRanges();
+ LIS.constructMainRangeFromSubranges(LI);
}
}
@@ -1146,7 +1295,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
if (ParentVNI->isUnused())
continue;
unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
- defValue(RegIdx, ParentVNI, ParentVNI->def);
+ defValue(RegIdx, ParentVNI, ParentVNI->def, true);
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
@@ -1182,8 +1331,9 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
deleteRematVictims();
// Get rid of unused values and set phi-kill flags.
- for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) {
- LiveInterval &LI = LIS.getInterval(*I);
+ for (unsigned Reg : *Edit) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LI.removeEmptySubRanges();
LI.RenumberValues();
}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index a968494..a75738a 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -325,12 +325,30 @@ private:
return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
}
+ /// Find a subrange corresponding to the lane mask @p LM in the live
+ /// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+ /// This function is used to find corresponding subranges between the
+ /// original interval and the new intervals.
+ LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
+
+ /// Add a segment to the interval LI for the value number VNI. If LI has
+ /// subranges, corresponding segments will be added to them as well, but
+ /// with newly created value numbers. If Original is true, dead def will
+ /// only be added a subrange of LI if the corresponding subrange of the
+ /// original interval has a def at this index. Otherwise, all subranges
+ /// of LI will be updated.
+ void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original);
+
/// defValue - define a value in RegIdx from ParentVNI at Idx.
/// Idx does not have to be ParentVNI->def, but it must be contained within
/// ParentVNI's live range in ParentLI. The new value is added to the value
- /// map.
+ /// map. The value being defined may either come from rematerialization
+ /// (or an inserted copy), or it may be coming from the original interval.
+ /// The parameter Original should be true in the latter case, otherwise
+ /// it should be false.
/// Return the new LI value.
- VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx,
+ bool Original);
/// forceRecompute - Force the live range of ParentVNI in RegIdx to be
/// recomputed by LiveRangeCalc::extend regardless of the number of defs.
@@ -368,6 +386,15 @@ private:
/// Return true if any ranges were skipped.
bool transferValues();
+ /// Live range @p LR corresponding to the lane Mask @p LM has a live
+ /// PHI def at the beginning of block @p B. Extend the range @p LR of
+ /// all predecessor values that reach this def. If @p LR is a subrange,
+ /// the array @p Undefs is the set of all locations where it is undefined
+ /// via <def,read-undef> in other subranges for the same register.
+ void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+ LiveRange &LR, LaneBitmask LM,
+ ArrayRef<SlotIndex> Undefs);
+
/// extendPHIKillRanges - Extend the ranges of all values killed by original
/// parent PHIDefs.
void extendPHIKillRanges();
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 87cd470..89c4b57 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -778,10 +778,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
unsigned FixedInstr = 0;
unsigned FixedMemOp = 0;
unsigned FixedDbg = 0;
- MachineModuleInfo *MMI = &MF->getMMI();
// Remap debug information that refers to stack slots.
- for (auto &VI : MMI->getVariableDbgInfo()) {
+ for (auto &VI : MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
if (SlotRemap.count(VI.Slot)) {
@@ -980,7 +979,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
<< "********** Function: "
<< ((const Value*)Func.getFunction())->getName() << '\n');
MF = &Func;
- MFI = MF->getFrameInfo();
+ MFI = &MF->getFrameInfo();
Indexes = &getAnalysis<SlotIndexes>();
SP = &getAnalysis<StackProtector>();
BlockLiveness.clear();
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 87e4eb6..a5ef7c8 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
@@ -64,7 +63,7 @@ public:
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
/// \brief Calculate the liveness information for the given machine function.
@@ -113,7 +112,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
++NumStackMapFuncVisited;
// Skip this function if there are no patchpoints to process.
- if (!MF.getFrameInfo()->hasPatchPoint()) {
+ if (!MF.getFrameInfo().hasPatchPoint()) {
++NumStackMapFuncSkipped;
return false;
}
@@ -126,7 +125,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
// For all basic blocks in the function.
for (auto &MBB : MF) {
DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
- LiveRegs.init(TRI);
+ LiveRegs.init(*TRI);
// FIXME: This should probably be addLiveOuts().
LiveRegs.addLiveOutsNoPristines(MBB);
bool HasStackMap = false;
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index d91bb80..9b7dd400 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -30,16 +30,20 @@ using namespace llvm;
#define DEBUG_TYPE "stackmaps"
static cl::opt<int> StackMapVersion(
- "stackmap-version", cl::init(1),
- cl::desc("Specify the stackmap encoding version (default = 1)"));
+ "stackmap-version", cl::init(2),
+ cl::desc("Specify the stackmap encoding version (default = 2)"));
const char *StackMaps::WSMP = "Stack Maps: ";
+StackMapOpers::StackMapOpers(const MachineInstr *MI)
+ : MI(MI) {
+ assert(getVarIdx() <= MI->getNumOperands() &&
+ "invalid stackmap definition");
+}
+
PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
: MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
- !MI->getOperand(0).isImplicit()),
- IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
- CallingConv::AnyReg) {
+ !MI->getOperand(0).isImplicit()) {
#ifndef NDEBUG
unsigned CheckStartIdx = 0, e = MI->getNumOperands();
while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
@@ -70,7 +74,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
}
StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
- if (StackMapVersion != 1)
+ if (StackMapVersion != 2)
llvm_unreachable("Unsupported stackmap version!");
}
@@ -272,8 +276,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
LiveOuts.erase(
- std::remove_if(LiveOuts.begin(), LiveOuts.end(),
- [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
LiveOuts.end());
return LiveOuts;
@@ -332,20 +335,26 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
std::move(LiveOuts));
- // Record the stack size of the current function.
- const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+ // Record the stack size of the current function and update callsite count.
+ const MachineFrameInfo &MFI = AP.MF->getFrameInfo();
const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
bool HasDynamicFrameSize =
- MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
- FnStackSize[AP.CurrentFnSym] =
- HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+ MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
+ uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize();
+
+ auto CurrentIt = FnInfos.find(AP.CurrentFnSym);
+ if (CurrentIt != FnInfos.end())
+ CurrentIt->second.RecordCount++;
+ else
+ FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
}
void StackMaps::recordStackMap(const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
- int64_t ID = MI.getOperand(0).getImm();
- recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2),
+ StackMapOpers opers(&MI);
+ const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
+ recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
MI.operands_end());
}
@@ -353,8 +362,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
PatchPointOpers opers(&MI);
- int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
-
+ const int64_t ID = opers.getID();
auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
opers.isAnyReg() && opers.hasDef());
@@ -363,7 +371,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
// verify anyregcc
auto &Locations = CSInfos.back().Locations;
if (opers.isAnyReg()) {
- unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
+ unsigned NArgs = opers.getNumCallArgs();
for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
assert(Locations[i].Type == Location::Register &&
"anyreg arg must be in reg.");
@@ -384,7 +392,7 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
/// Emit the stackmap header.
///
/// Header {
-/// uint8 : Stack Map Version (currently 1)
+/// uint8 : Stack Map Version (currently 2)
/// uint8 : Reserved (expected to be 0)
/// uint16 : Reserved (expected to be 0)
/// }
@@ -398,8 +406,8 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
OS.EmitIntValue(0, 2); // Reserved.
// Num functions.
- DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
- OS.EmitIntValue(FnStackSize.size(), 4);
+ DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
+ OS.EmitIntValue(FnInfos.size(), 4);
// Num constants.
DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
OS.EmitIntValue(ConstPool.size(), 4);
@@ -413,15 +421,18 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
/// StkSizeRecord[NumFunctions] {
/// uint64 : Function Address
/// uint64 : Stack Size
+/// uint64 : Record Count
/// }
void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
// Function Frame records.
DEBUG(dbgs() << WSMP << "functions:\n");
- for (auto const &FR : FnStackSize) {
+ for (auto const &FR : FnInfos) {
DEBUG(dbgs() << WSMP << "function addr: " << FR.first
- << " frame size: " << FR.second);
+ << " frame size: " << FR.second.StackSize
+ << " callsite count: " << FR.second.RecordCount << '\n');
OS.EmitSymbolValue(FR.first, 8);
- OS.EmitIntValue(FR.second, 8);
+ OS.EmitIntValue(FR.second.StackSize, 8);
+ OS.EmitIntValue(FR.second.RecordCount, 8);
}
}
@@ -522,7 +533,7 @@ void StackMaps::serializeToStackMapSection() {
// Bail out if there's no stack map data.
assert((!CSInfos.empty() || ConstPool.empty()) &&
"Expected empty constant pool too!");
- assert((!CSInfos.empty() || FnStackSize.empty()) &&
+ assert((!CSInfos.empty() || FnInfos.empty()) &&
"Expected empty function record too!");
if (CSInfos.empty())
return;
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index 89868e4..c2c010a 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -50,7 +50,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
cl::init(true), cl::Hidden);
char StackProtector::ID = 0;
-INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors",
+INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
false, true)
FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
@@ -236,11 +236,6 @@ bool StackProtector::RequiresStackProtector() {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
- // SSP-Strong: Enable protectors for any call to alloca, regardless
- // of size.
- if (Strong)
- return true;
-
if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index d996714..234b204 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
I != E; ++I) {
if (DCELimit != -1 && (int)NumDead >= DCELimit)
break;
-
int FirstSS, SecondSS;
if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
FirstSS != -1) {
@@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
}
MachineBasicBlock::iterator NextMI = std::next(I);
- if (NextMI == MBB->end()) continue;
+ MachineBasicBlock::iterator ProbableLoadMI = I;
unsigned LoadReg = 0;
unsigned StoreReg = 0;
if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
continue;
+ // Skip the ...pseudo debugging... instructions between a load and store.
+ while ((NextMI != E) && NextMI->isDebugValue()) {
+ ++NextMI;
+ ++I;
+ }
+ if (NextMI == E) continue;
if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
continue;
if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
@@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
++NumDead;
- toErase.push_back(&*I);
+ toErase.push_back(&*ProbableLoadMI);
}
toErase.push_back(&*NextMI);
@@ -428,7 +433,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
<< "********** Function: " << MF.getName() << '\n';
});
- MFI = MF.getFrameInfo();
+ MFI = &MF.getFrameInfo();
TII = MF.getSubtarget().getInstrInfo();
LS = &getAnalysis<LiveStacks>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 2b1fb12..e2377d8 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -47,13 +47,12 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- auto MMI = getAnalysisIfAvailable<MachineModuleInfo>();
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- Duplicator.initMF(MF, MMI, MBPI);
+ Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
bool MadeChange = false;
- while (Duplicator.tailDuplicateBlocks(MF))
+ while (Duplicator.tailDuplicateBlocks())
MadeChange = true;
return MadeChange;
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
index 847a093..7709236 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
@@ -40,12 +41,20 @@ STATISTIC(NumTailDupRemoved,
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumAddedPHIs, "Number of phis added");
+namespace llvm {
+
// Heuristic for tail duplication.
static cl::opt<unsigned> TailDuplicateSize(
"tail-dup-size",
cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
cl::Hidden);
+cl::opt<unsigned> TailDupIndirectBranchSize(
+ "tail-dup-indirect-size",
+ cl::desc("Maximum instructions to consider tail duplicating blocks that "
+ "end with indirect branches."), cl::init(20),
+ cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -54,18 +63,20 @@ static cl::opt<bool>
static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
cl::Hidden);
-namespace llvm {
-
-void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin,
- const MachineBranchProbabilityInfo *MBPIin) {
- TII = MF.getSubtarget().getInstrInfo();
- TRI = MF.getSubtarget().getRegisterInfo();
- MRI = &MF.getRegInfo();
- MMI = MMIin;
+void TailDuplicator::initMF(MachineFunction &MFin,
+ const MachineBranchProbabilityInfo *MBPIin,
+ bool LayoutModeIn, unsigned TailDupSizeIn) {
+ MF = &MFin;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ MMI = &MF->getMMI();
MBPI = MBPIin;
+ TailDupSize = TailDupSizeIn;
assert(MBPI != nullptr && "Machine Branch Probability Info required");
+ LayoutMode = LayoutModeIn;
PreRegAlloc = MRI->isSSA();
}
@@ -78,10 +89,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
while (MI != MBB->end()) {
if (!MI->isPHI())
break;
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end();
- PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
+ for (MachineBasicBlock *PredBB : Preds) {
bool Found = false;
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
@@ -119,21 +127,31 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
/// Tail duplicate the block and cleanup.
-bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
- MachineBasicBlock *MBB) {
+/// \p IsSimple - return value of isSimpleBB
+/// \p MBB - block to be duplicated
+/// \p ForcedLayoutPred - If non-null, treat this block as the layout
+/// predecessor, instead of using the ordering in MF
+/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
+/// all Preds that received a copy of \p MBB.
+/// \p RemovalCallback - if non-null, called just before MBB is deleted.
+bool TailDuplicator::tailDuplicateAndUpdate(
+ bool IsSimple, MachineBasicBlock *MBB,
+ MachineBasicBlock *ForcedLayoutPred,
+ SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
+ llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
// Save the successors list.
SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
MBB->succ_end());
SmallVector<MachineBasicBlock *, 8> TDBBs;
SmallVector<MachineInstr *, 16> Copies;
- if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies))
+ if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
return false;
++NumTails;
SmallVector<MachineInstr *, 8> NewPHIs;
- MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+ MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
// TailBB's immediate successors are now successors of those predecessors
// which duplicated TailBB. Add the predecessors as sources to the PHI
@@ -145,7 +163,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
// If it is dead, remove it.
if (isDead) {
NumTailDupRemoved += MBB->size();
- removeDeadBlock(MBB);
+ removeDeadBlock(MBB, RemovalCallback);
++NumDeadBlocks;
}
@@ -216,21 +234,24 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
if (NewPHIs.size())
NumAddedPHIs += NewPHIs.size();
+ if (DuplicatedPreds)
+ *DuplicatedPreds = std::move(TDBBs);
+
return true;
}
/// Look for small blocks that are unconditionally branched to and do not fall
/// through. Tail-duplicate their instructions into their predecessors to
/// eliminate (dynamic) branches.
-bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
+bool TailDuplicator::tailDuplicateBlocks() {
bool MadeChange = false;
if (PreRegAlloc && TailDupVerify) {
DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
- VerifyPHIs(MF, true);
+ VerifyPHIs(*MF, true);
}
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) {
+ for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
MachineBasicBlock *MBB = &*I++;
if (NumTails == TailDupLimit)
@@ -238,14 +259,14 @@ bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
bool IsSimple = isSimpleBB(MBB);
- if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ if (!shouldTailDuplicate(IsSimple, *MBB))
continue;
- MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB);
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
}
if (PreRegAlloc && TailDupVerify)
- VerifyPHIs(MF, false);
+ VerifyPHIs(*MF, false);
return MadeChange;
}
@@ -334,10 +355,9 @@ void TailDuplicator::processPHI(
/// the source operands due to earlier PHI translation.
void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
- MachineFunction &MF,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi) {
- MachineInstr *NewMI = TII->duplicate(*MI, MF);
+ MachineInstr *NewMI = TII->duplicate(*MI, *MF);
if (PreRegAlloc) {
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
@@ -421,18 +441,14 @@ void TailDuplicator::updateSuccessorsPHIs(
MachineBasicBlock *FromBB, bool isDead,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallSetVector<MachineBasicBlock *, 8> &Succs) {
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(),
- SE = Succs.end();
- SI != SE; ++SI) {
- MachineBasicBlock *SuccBB = *SI;
- for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
- II != EE; ++II) {
- if (!II->isPHI())
+ for (MachineBasicBlock *SuccBB : Succs) {
+ for (MachineInstr &MI : *SuccBB) {
+ if (!MI.isPHI())
break;
- MachineInstrBuilder MIB(*FromBB->getParent(), II);
+ MachineInstrBuilder MIB(*FromBB->getParent(), MI);
unsigned Idx = 0;
- for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
- MachineOperand &MO = II->getOperand(i + 1);
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i + 1);
if (MO.getMBB() == FromBB) {
Idx = i;
break;
@@ -440,17 +456,17 @@ void TailDuplicator::updateSuccessorsPHIs(
}
assert(Idx != 0);
- MachineOperand &MO0 = II->getOperand(Idx);
+ MachineOperand &MO0 = MI.getOperand(Idx);
unsigned Reg = MO0.getReg();
if (isDead) {
// Folded into the previous BB.
// There could be duplicate phi source entries. FIXME: Should sdisel
// or earlier pass fixed this?
- for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) {
- MachineOperand &MO = II->getOperand(i + 1);
+ for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
+ MachineOperand &MO = MI.getOperand(i + 1);
if (MO.getMBB() == FromBB) {
- II->RemoveOperand(i + 1);
- II->RemoveOperand(i);
+ MI.RemoveOperand(i + 1);
+ MI.RemoveOperand(i);
}
}
} else
@@ -474,8 +490,8 @@ void TailDuplicator::updateSuccessorsPHIs(
unsigned SrcReg = LI->second[j].second;
if (Idx != 0) {
- II->getOperand(Idx).setReg(SrcReg);
- II->getOperand(Idx + 1).setMBB(SrcBB);
+ MI.getOperand(Idx).setReg(SrcReg);
+ MI.getOperand(Idx + 1).setMBB(SrcBB);
Idx = 0;
} else {
MIB.addReg(SrcReg).addMBB(SrcBB);
@@ -486,8 +502,8 @@ void TailDuplicator::updateSuccessorsPHIs(
for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
MachineBasicBlock *SrcBB = TDBBs[j];
if (Idx != 0) {
- II->getOperand(Idx).setReg(Reg);
- II->getOperand(Idx + 1).setMBB(SrcBB);
+ MI.getOperand(Idx).setReg(Reg);
+ MI.getOperand(Idx + 1).setMBB(SrcBB);
Idx = 0;
} else {
MIB.addReg(Reg).addMBB(SrcBB);
@@ -495,19 +511,20 @@ void TailDuplicator::updateSuccessorsPHIs(
}
}
if (Idx != 0) {
- II->RemoveOperand(Idx + 1);
- II->RemoveOperand(Idx);
+ MI.RemoveOperand(Idx + 1);
+ MI.RemoveOperand(Idx);
}
}
}
}
/// Determine if it is profitable to duplicate this block.
-bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
- bool IsSimple,
+bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
MachineBasicBlock &TailBB) {
- // Only duplicate blocks that end with unconditional branches.
- if (TailBB.canFallThrough())
+ // When doing tail-duplication during layout, the block ordering is in flux,
+ // so canFallThrough returns a result based on incorrect information and
+ // should just be ignored.
+ if (!LayoutMode && TailBB.canFallThrough())
return false;
// Don't try to tail-duplicate single-block loops.
@@ -518,12 +535,24 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (TailDuplicateSize.getNumOccurrences() == 0 &&
- // FIXME: Use Function::optForSize().
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ if (TailDupSize == 0 &&
+ TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF->getFunction()->optForSize())
MaxDuplicateCount = 1;
- else
+ else if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
+ else
+ MaxDuplicateCount = TailDupSize;
+
+ // If the block to be duplicated ends in an unanalyzable fallthrough, don't
+ // duplicate it.
+ // A similar check is necessary in MachineBlockPlacement to make sure pairs of
+ // blocks with unanalyzable fallthrough get layed out contiguously.
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
+ TailBB.canFallThrough())
+ return false;
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
@@ -536,7 +565,7 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
HasIndirectbr = TailBB.back().isIndirectBranch();
if (HasIndirectbr && PreRegAlloc)
- MaxDuplicateCount = 20;
+ MaxDuplicateCount = TailDupIndirectBranchSize;
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
@@ -631,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
- if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
return false;
if (!PredCond.empty())
@@ -649,11 +678,7 @@ bool TailDuplicator::duplicateSimpleBB(
SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
TailBB->pred_end());
bool Changed = false;
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end();
- PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
+ for (MachineBasicBlock *PredBB : Preds) {
if (PredBB->hasEHPadSuccessor())
continue;
@@ -662,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB(
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
- if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
continue;
Changed = true;
@@ -670,7 +695,7 @@ bool TailDuplicator::duplicateSimpleBB(
<< "From simple Succ: " << *TailBB);
MachineBasicBlock *NewTarget = *TailBB->succ_begin();
- MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
+ MachineBasicBlock *NextBB = PredBB->getNextNode();
// Make PredFBB explicit.
if (PredCond.empty())
@@ -700,7 +725,7 @@ bool TailDuplicator::duplicateSimpleBB(
if (PredTBB == NextBB && PredFBB == nullptr)
PredTBB = nullptr;
- TII->RemoveBranch(*PredBB);
+ TII->removeBranch(*PredBB);
if (!PredBB->isSuccessor(NewTarget))
PredBB->replaceSuccessor(TailBB, NewTarget);
@@ -710,17 +735,40 @@ bool TailDuplicator::duplicateSimpleBB(
}
if (PredTBB)
- TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+ TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
TDBBs.push_back(PredBB);
}
return Changed;
}
+bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB) {
+ // EH edges are ignored by analyzeBranch.
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+ return false;
+ if (!PredCond.empty())
+ return false;
+ return true;
+}
+
/// If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
-bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
- MachineBasicBlock *TailBB,
+/// \p IsSimple result of isSimpleBB
+/// \p TailBB Block to be duplicated.
+/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor
+/// instead of the previous block in MF's order.
+/// \p TDBBs A vector to keep track of all blocks tail-duplicated
+/// into.
+/// \p Copies A vector of copy instructions inserted. Used later to
+/// walk all the inserted copies and remove redundant ones.
+bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
+ MachineBasicBlock *ForcedLayoutPred,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies) {
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
@@ -737,25 +785,20 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
bool Changed = false;
SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
TailBB->pred_end());
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end();
- PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
-
+ for (MachineBasicBlock *PredBB : Preds) {
assert(TailBB != PredBB &&
"Single-block loop should have been rejected earlier!");
- // EH edges are ignored by AnalyzeBranch.
- if (PredBB->succ_size() > 1)
- continue;
- MachineBasicBlock *PredTBB, *PredFBB;
- SmallVector<MachineOperand, 4> PredCond;
- if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
- continue;
- if (!PredCond.empty())
+ if (!canTailDuplicate(TailBB, PredBB))
continue;
+
// Don't duplicate into a fall-through predecessor (at least for now).
- if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ bool IsLayoutSuccessor = false;
+ if (ForcedLayoutPred)
+ IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
+ else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ IsLayoutSuccessor = true;
+ if (IsLayoutSuccessor)
continue;
DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
@@ -764,7 +807,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
TDBBs.push_back(PredBB);
// Remove PredBB's unconditional branch.
- TII->RemoveBranch(*PredBB);
+ TII->removeBranch(*PredBB);
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
@@ -782,13 +825,15 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
} else {
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
- duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
}
}
appendCopies(PredBB, CopyInfos, Copies);
// Simplify
- TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
@@ -796,10 +841,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
PredBB->removeSuccessor(PredBB->succ_begin());
assert(PredBB->succ_empty() &&
"TailDuplicate called on block with multiple successors!");
- for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
- E = TailBB->succ_end();
- I != E; ++I)
- PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
+ for (MachineBasicBlock *Succ : TailBB->successors())
+ PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
Changed = true;
++NumTailDups;
@@ -808,17 +851,27 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
// If TailBB was duplicated into all its predecessors except for the prior
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
- MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
+ MachineBasicBlock *PrevBB = ForcedLayoutPred;
+ if (!PrevBB)
+ PrevBB = &*std::prev(TailBB->getIterator());
MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
- // AnalyzeBranch.
+ // analyzeBranch.
if (PrevBB->succ_size() == 1 &&
- !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
- PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ // Layout preds are not always CFG preds. Check.
+ *PrevBB->succ_begin() == TailBB &&
+ !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
+ PriorCond.empty() &&
+ (!PriorTBB || PriorTBB == TailBB) &&
+ TailBB->pred_size() == 1 &&
!TailBB->hasAddressTaken()) {
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
<< "From MBB: " << *TailBB);
+ // There may be a branch to the layout successor. This is unlikely but it
+ // happens. The correct thing to do is to remove the branch before
+ // duplicating the instructions in all cases.
+ TII->removeBranch(*PrevBB);
if (PreRegAlloc) {
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
@@ -837,11 +890,12 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
// uses with PHI source register or the new registers.
MachineInstr *MI = &*I++;
assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
- duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
appendCopies(PrevBB, CopyInfos, Copies);
} else {
+ TII->removeBranch(*PrevBB);
// No PHIs to worry about, just splice the instructions over.
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
}
@@ -874,11 +928,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
// What we do here is introduce a copy in 3 of the register defined by the
// phi, just like when we are duplicating 2 into 3, but we don't copy any
// real instructions or remove the 3 -> 2 edge from the phi in 2.
- for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
- PE = Preds.end();
- PI != PE; ++PI) {
- MachineBasicBlock *PredBB = *PI;
- if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ for (MachineBasicBlock *PredBB : Preds) {
+ if (is_contained(TDBBs, PredBB))
continue;
// EH edges
@@ -917,10 +968,15 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
/// Remove the specified dead machine basic block from the function, updating
/// the CFG.
-void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
+void TailDuplicator::removeDeadBlock(
+ MachineBasicBlock *MBB,
+ llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+ if (RemovalCallback)
+ (*RemovalCallback)(MBB);
+
// Remove all successors.
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end() - 1);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cac7e63..f082add 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -40,7 +40,7 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
/// is overridden for some targets.
int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
// By default, assume all frame indices are referenced via whatever
@@ -48,13 +48,13 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// something different.
FrameReg = RI->getFrameRegister(MF);
- return MFI->getObjectOffset(FI) + MFI->getStackSize() -
- getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+ return MFI.getObjectOffset(FI) + MFI.getStackSize() -
+ getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
}
bool TargetFrameLowering::needsFrameIndexResolution(
const MachineFunction &MF) const {
- return MF.getFrameInfo()->hasStackObjects();
+ return MF.getFrameInfo().hasStackObjects();
}
void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -84,7 +84,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
return;
// Functions which call __builtin_unwind_init get all their registers saved.
- bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+ bool CallsUnwindInit = MF.callsUnwindInit();
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e7330c6..01f91b9 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -84,8 +84,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0) {
atInsnStart = true;
- } else if (strncmp(Str, MAI.getCommentString(),
- strlen(MAI.getCommentString())) == 0) {
+ } else if (strncmp(Str, MAI.getCommentString().data(),
+ MAI.getCommentString().size()) == 0) {
// Stop counting as an instruction after a comment until the next
// separator.
atInsnStart = false;
@@ -119,7 +119,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If MBB isn't immediately before MBB, insert a branch to it.
if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
- InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
+ insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
MBB->addSuccessor(NewDest);
}
@@ -437,13 +437,20 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
switch (MI.getOpcode()) {
- case TargetOpcode::STACKMAP:
- StartIdx = 2; // Skip ID, nShadowBytes.
+ case TargetOpcode::STACKMAP: {
+ // StackMapLiveValues are foldable
+ StartIdx = StackMapOpers(&MI).getVarIdx();
break;
+ }
case TargetOpcode::PATCHPOINT: {
- // For PatchPoint, the call args are not foldable.
- PatchPointOpers opers(&MI);
- StartIdx = opers.getVarIdx();
+ // For PatchPoint, the call args are not foldable (even if reported in the
+ // stackmap e.g. via anyregcc).
+ StartIdx = PatchPointOpers(&MI).getVarIdx();
+ break;
+ }
+ case TargetOpcode::STATEPOINT: {
+ // For statepoints, fold deopt and gc arguments, but not call arguments.
+ StartIdx = StatepointOpers(&MI).getVarIdx();
break;
}
default:
@@ -467,7 +474,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+ if (is_contained(Ops, i)) {
unsigned SpillSize;
unsigned SpillOffset;
// Compute the spill slot size and offset.
@@ -508,10 +515,36 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
assert(MBB && "foldMemoryOperand needs an inserted instruction");
MachineFunction &MF = *MBB->getParent();
+ // If we're not folding a load into a subreg, the size of the load is the
+ // size of the spill slot. But if we are, we need to figure out what the
+ // actual load size is.
+ int64_t MemSize = 0;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ if (Flags & MachineMemOperand::MOStore) {
+ MemSize = MFI.getObjectSize(FI);
+ } else {
+ for (unsigned Idx : Ops) {
+ int64_t OpSize = MFI.getObjectSize(FI);
+
+ if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+ unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
+ if (SubRegSize > 0 && !(SubRegSize % 8))
+ OpSize = SubRegSize / 8;
+ }
+
+ MemSize = std::max(MemSize, OpSize);
+ }
+ }
+
+ assert(MemSize && "Did not expect a zero-sized stack slot");
+
MachineInstr *NewMI = nullptr;
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
if (NewMI)
@@ -530,10 +563,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
assert((!(Flags & MachineMemOperand::MOLoad) ||
NewMI->mayLoad()) &&
"Folded a use to a non-load!");
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+ MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -550,7 +582,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
MachineBasicBlock::iterator Pos = MI;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (Flags == MachineMemOperand::MOStore)
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
@@ -792,7 +823,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
int FrameIndex = 0;
if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) &&
isLoadFromStackSlot(LoadMI, FrameIndex)) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
@@ -844,7 +876,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// simple, and a common case.
int FrameIdx = 0;
if (isLoadFromStackSlot(MI, FrameIdx) &&
- MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ MF.getFrameInfo().isImmutableObjectIndex(FrameIdx))
return true;
// Avoid instructions obviously unsafe for remat.
@@ -857,7 +889,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
return false;
// Avoid instructions which load from potentially varying memory.
- if (MI.mayLoad() && !MI.isInvariantLoad(AA))
+ if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
@@ -875,7 +907,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!MRI.isConstantPhysReg(Reg, MF))
+ if (!MRI.isConstantPhysReg(Reg))
return false;
} else {
// A physreg def. We can't remat it.
@@ -1091,35 +1123,6 @@ int TargetInstrInfo::computeDefOperandLatency(
return -1;
}
-unsigned TargetInstrInfo::computeOperandLatency(
- const InstrItineraryData *ItinData, const MachineInstr &DefMI,
- unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const {
-
- int DefLatency = computeDefOperandLatency(ItinData, DefMI);
- if (DefLatency >= 0)
- return DefLatency;
-
- assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
- int OperLatency = 0;
- if (UseMI)
- OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx);
- else {
- unsigned DefClass = DefMI.getDesc().getSchedClass();
- OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
- }
- if (OperLatency >= 0)
- return OperLatency;
-
- // No operand latency was found.
- unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
- // Expected latency is the max of the stage latency and itinerary props.
- InstrLatency = std::max(InstrLatency,
- defaultDefLatency(ItinData->SchedModel, DefMI));
- return InstrLatency;
-}
-
bool TargetInstrInfo::getRegSequenceInputs(
const MachineInstr &MI, unsigned DefIdx,
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6d3fe8c..003311b 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -14,6 +14,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -44,6 +45,14 @@ static cl::opt<bool> JumpIsExpensiveOverride(
cl::desc("Do not create extra branches to split comparison logic."),
cl::Hidden);
+static cl::opt<unsigned> MinimumJumpTableEntries
+ ("min-jump-table-entries", cl::init(4), cl::Hidden,
+ cl::desc("Set minimum number of entries to use a jump table."));
+
+static cl::opt<unsigned> MaximumJumpTableSize
+ ("max-jump-table-size", cl::init(0), cl::Hidden,
+ cl::desc("Set maximum size of jump tables; zero for no limit."));
+
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
@@ -352,6 +361,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::MEMCPY] = "memcpy";
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
+ Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
+ Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
+ Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
+ Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -488,12 +502,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
}
-/// InitLibcallCallingConvs - Set default libcall CallingConvs.
-///
+/// Set default libcall CallingConvs.
static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
- for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
- CCs[i] = CallingConv::C;
- }
+ for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
+ CCs[LC] = CallingConv::C;
}
/// getFPEXT - Return the FPEXT_*_* value for the given types, or
@@ -756,6 +768,24 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+ switch (ElementSize) {
+ case 1:
+ return MEMCPY_ELEMENT_ATOMIC_1;
+ case 2:
+ return MEMCPY_ELEMENT_ATOMIC_2;
+ case 4:
+ return MEMCPY_ELEMENT_ATOMIC_4;
+ case 8:
+ return MEMCPY_ELEMENT_ATOMIC_8;
+ case 16:
+ return MEMCPY_ELEMENT_ATOMIC_16;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+}
+
/// InitCmpLibcallCCs - Set default comparison libcall CC.
///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -804,10 +834,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
= MaxStoresPerMemmoveOptSize = 4;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
- SelectIsExpensive = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
- FsqrtIsCheap = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
MaskAndBranchFoldingIsLegal = false;
@@ -825,7 +853,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
PrefLoopAlignment = 0;
GatherAllAliasesMaxDepth = 6;
MinStackArgumentAlignment = 1;
- MinimumJumpTableEntries = 4;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -956,15 +983,11 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
return getScalarShiftAmountTy(DL, LHSTy);
}
-/// canOpTrap - Returns true if the operation can trap for the value type.
-/// VT must be a legal type.
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
assert(isTypeLegal(VT));
switch (Op) {
default:
return false;
- case ISD::FDIV:
- case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -1177,7 +1200,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MachineBasicBlock *MBB) const {
MachineInstr *MI = &InitialMI;
MachineFunction &MF = *MI->getParent()->getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
// We're handling multiple types of operands here:
// PATCHPOINT MetaArgs - live-in, read only, direct
@@ -1402,7 +1425,7 @@ void TargetLoweringBase::computeRegisterProperties(
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
- if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() &&
+ if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
@@ -1754,9 +1777,41 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
}
}
+Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
+ bool UseTLS) const {
+ // compiler-rt provides a variable with a magic name. Targets that do not
+ // link with compiler-rt may also provide such a variable.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+ auto UnsafeStackPtr =
+ dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
+
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+
+ if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
+ // The global variable is not defined yet, define it ourselves.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
+ UnsafeStackPtr = new GlobalVariable(
+ *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
+ } else {
+ // The variable exists, check its type and attributes.
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
+ }
+ return UnsafeStackPtr;
+}
+
Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (!TM.getTargetTriple().isAndroid())
- return nullptr;
+ return getDefaultSafeStackPointerLocation(IRB, true);
// Android provides a libc function to retrieve the address of the current
// thread's unsafe stack pointer.
@@ -1818,9 +1873,7 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
- auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy));
- Guard->setVisibility(GlobalValue::HiddenVisibility);
- return Guard;
+ return M.getOrInsertGlobal("__guard_local", PtrTy);
}
return nullptr;
}
@@ -1840,3 +1893,207 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
+
+unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
+ return MinimumJumpTableEntries;
+}
+
+void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
+ MinimumJumpTableEntries = Val;
+}
+
+unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
+ return MaximumJumpTableSize;
+}
+
+void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
+ MaximumJumpTableSize = Val;
+}
+
+//===----------------------------------------------------------------------===//
+// Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ StringRef RecipAttrName = "reciprocal-estimates";
+ if (!F->hasFnAttribute(RecipAttrName))
+ return StringRef();
+
+ return F->getFnAttribute(RecipAttrName).getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+ std::string Name = VT.isVector() ? "vec-" : "";
+
+ Name += IsSqrt ? "sqrt" : "div";
+
+ // TODO: Handle "half" or other float types?
+ if (VT.getScalarType() == MVT::f64) {
+ Name += "d";
+ } else {
+ assert(VT.getScalarType() == MVT::f32 &&
+ "Unexpected FP type for reciprocal estimate");
+ Name += "f";
+ }
+
+ return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (RefStepChar >= '0' && RefStepChar <= '9') {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "none", or "default" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(Override, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ }
+
+ // All reciprocal types are enabled.
+ if (Override == "all")
+ return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+ // All reciprocal types are disabled.
+ if (Override == "none")
+ return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+ // Target defaults for enablement are used.
+ if (Override == "default")
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTNameNoSize.pop_back();
+ static const char DisabledPrefix = '!';
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(RecipType, RefPos, RefSteps))
+ RecipType = RecipType.substr(0, RefPos);
+
+ // Ignore the disablement token for string matching.
+ bool IsDisabled = RecipType[0] == DisabledPrefix;
+ if (IsDisabled)
+ RecipType = RecipType.substr(1);
+
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+ : TargetLoweringBase::ReciprocalEstimate::Enabled;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(Override, RefPos, RefSteps))
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ assert(Override != "none" &&
+ "Disabled reciprocals, but specifed refinement steps?");
+
+ // If this is a general override, return the specified number of steps.
+ if (Override == "all" || Override == "default")
+ return RefSteps;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTNameNoSize.pop_back();
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+ continue;
+
+ RecipType = RecipType.substr(0, RefPos);
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return RefSteps;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 5f814c9..eb2a28f 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -50,14 +50,14 @@ using namespace dwarf;
//===----------------------------------------------------------------------===//
MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
- const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+ const GlobalValue *GV, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
- TM.getSymbol(GV, Mang)->getName());
+ TM.getSymbol(GV)->getName());
if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
- return TM.getSymbol(GV, Mang);
+ return TM.getSymbol(GV);
report_fatal_error("We do not support this DWARF encoding yet!");
}
@@ -84,20 +84,19 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
}
const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
- const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
- const TargetMachine &TM, MachineModuleInfo *MMI,
- MCStreamer &Streamer) const {
+ const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
if (Encoding & dwarf::DW_EH_PE_indirect) {
MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
- MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
// Add information about the stub reference to ELFMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
if (!StubSym.getPointer()) {
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ MCSymbol *Sym = TM.getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -106,8 +105,8 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
}
- return TargetLoweringObjectFile::
- getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer);
+ return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+ MMI, Streamer);
}
static SectionKind
@@ -152,6 +151,11 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+ // Use SHT_NOTE for section whose name starts with ".note" to allow
+ // emitting ELF notes from C variable declaration.
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609
+ if (Name.startswith(".note"))
+ return ELF::SHT_NOTE;
if (Name == ".init_array")
return ELF::SHT_INIT_ARRAY;
@@ -177,6 +181,9 @@ static unsigned getELFSectionFlags(SectionKind K) {
if (K.isText())
Flags |= ELF::SHF_EXECINSTR;
+ if (K.isExecuteOnly())
+ Flags |= ELF::SHF_ARM_PURECODE;
+
if (K.isWriteable())
Flags |= ELF::SHF_WRITE;
@@ -205,16 +212,15 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
}
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
- StringRef SectionName = GV->getSection();
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ StringRef SectionName = GO->getSection();
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
StringRef Group = "";
unsigned Flags = getELFSectionFlags(Kind);
- if (const Comdat *C = getELFComdat(GV)) {
+ if (const Comdat *C = getELFComdat(GO)) {
Group = C->getName();
Flags |= ELF::SHF_GROUP;
}
@@ -243,7 +249,7 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
}
static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM, bool EmitUniqueSection,
unsigned Flags, unsigned *NextUniqueID) {
@@ -271,7 +277,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
}
StringRef Group = "";
- if (const Comdat *C = getELFComdat(GV)) {
+ if (const Comdat *C = getELFComdat(GO)) {
Flags |= ELF::SHF_GROUP;
Group = C->getName();
}
@@ -282,8 +288,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
// We also need alignment here.
// FIXME: this is getting the alignment of the character, not the
// alignment of the global!
- unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GV));
+ unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GO));
std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
Name = SizeSpec + utostr(Align);
@@ -293,25 +299,31 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
} else {
Name = getSectionPrefixForGlobal(Kind);
}
- // FIXME: Extend the section prefix to include hotness catagories such as .hot
- // or .unlikely for functions.
+
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ const auto &OptionalPrefix = F->getSectionPrefix();
+ if (OptionalPrefix)
+ Name += *OptionalPrefix;
+ }
if (EmitUniqueSection && UniqueSectionNames) {
Name.push_back('.');
- TM.getNameWithPrefix(Name, GV, Mang, true);
+ TM.getNameWithPrefix(Name, GO, Mang, true);
}
unsigned UniqueID = MCContext::GenericSectionID;
if (EmitUniqueSection && !UniqueSectionNames) {
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
}
+ // Use 0 as the unique ID for execute-only text
+ if (Kind.isExecuteOnly())
+ UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
EntrySize, Group, UniqueID);
}
MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
unsigned Flags = getELFSectionFlags(Kind);
// If we have -ffunction-section or -fdata-section then we should emit the
@@ -323,14 +335,14 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
else
EmitUniqueSection = TM.getDataSections();
}
- EmitUniqueSection |= GV->hasComdat();
+ EmitUniqueSection |= GO->hasComdat();
- return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+ return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
EmitUniqueSection, Flags, &NextUniqueID);
}
MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
- const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ const Function &F, const TargetMachine &TM) const {
// If the function can be removed, produce a unique section so that
// the table doesn't prevent the removal.
const Comdat *C = F.getComdat();
@@ -339,7 +351,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
return ReadOnlySection;
return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
- Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+ getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
&NextUniqueID);
}
@@ -423,7 +435,7 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
}
const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
- const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+ const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
// We may only use a PLT-relative relocation to refer to unnamed_addr
// functions.
@@ -437,22 +449,28 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
return nullptr;
return MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind,
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind,
getContext()),
- MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()),
- getContext());
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
void
TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
UseInitArray = UseInitArray_;
- if (!UseInitArray)
+ MCContext &Ctx = getContext();
+ if (!UseInitArray) {
+ StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+ StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
return;
+ }
- StaticCtorSection = getContext().getELFSection(
- ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
- StaticDtorSection = getContext().getELFSection(
- ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
}
//===----------------------------------------------------------------------===//
@@ -464,11 +482,28 @@ TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
SupportIndirectSymViaGOTPCRel = true;
}
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0,
+ SectionKind::getData());
+ } else {
+ StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func",
+ MachO::S_MOD_INIT_FUNC_POINTERS,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func",
+ MachO::S_MOD_TERM_FUNC_POINTERS,
+ SectionKind::getData());
+ }
+}
+
/// emitModuleFlags - Perform code emission for module flags.
-void TargetLoweringObjectFileMachO::
-emitModuleFlags(MCStreamer &Streamer,
- ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileMachO::emitModuleFlags(
+ MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ const TargetMachine &TM) const {
unsigned VersionVal = 0;
unsigned ImageInfoFlags = 0;
MDNode *LinkerOptions = nullptr;
@@ -542,23 +577,22 @@ static void checkMachOComdat(const GlobalValue *GV) {
}
MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// Parse the section specifier and create it if valid.
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
- checkMachOComdat(GV);
+ checkMachOComdat(GO);
std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section,
TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getName() +
+ report_fatal_error("Global variable '" + GO->getName() +
"' has an invalid section specifier '" +
- GV->getSection() + "': " + ErrorCode + ".");
+ GO->getSection() + "': " + ErrorCode + ".");
}
// Get the section.
@@ -575,7 +609,7 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
// to reject it here.
if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
// If invalid, report the error with report_fatal_error.
- report_fatal_error("Global variable '" + GV->getName() +
+ report_fatal_error("Global variable '" + GO->getName() +
"' section type or attributes does not match previous"
" section specifier");
}
@@ -584,20 +618,19 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
}
MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
- checkMachOComdat(GV);
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ checkMachOComdat(GO);
// Handle thread local data.
if (Kind.isThreadBSS()) return TLSBSSSection;
if (Kind.isThreadData()) return TLSDataSection;
if (Kind.isText())
- return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+ return GO->isWeakForLinker() ? TextCoalSection : TextSection;
// If this is weak/linkonce, put this in a coalescable section, either in text
// or data depending on if it is writable.
- if (GV->isWeakForLinker()) {
+ if (GO->isWeakForLinker()) {
if (Kind.isReadOnly())
return ConstTextCoalSection;
return DataCoalSection;
@@ -605,21 +638,21 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
// FIXME: Alignment check should be handled by section classifier.
if (Kind.isMergeable1ByteCString() &&
- GV->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GV)) < 32)
+ GO->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GO)) < 32)
return CStringSection;
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
- if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
- GV->getParent()->getDataLayout().getPreferredAlignment(
- cast<GlobalVariable>(GV)) < 32)
+ if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
+ GO->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GO)) < 32)
return UStringSection;
// With MachO only variables whose corresponding symbol starts with 'l' or
// 'L' can be merged, so we only try merging GVs with private linkage.
- if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
+ if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) {
if (Kind.isMergeableConst4())
return FourByteConstantSection;
if (Kind.isMergeableConst8())
@@ -670,23 +703,21 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
}
const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
- const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
- const TargetMachine &TM, MachineModuleInfo *MMI,
- MCStreamer &Streamer) const {
+ const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// The mach-o version of this method defaults to returning a stub reference.
if (Encoding & DW_EH_PE_indirect) {
MachineModuleInfoMachO &MachOMMI =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
- MCSymbol *SSym =
- getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (!StubSym.getPointer()) {
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ MCSymbol *Sym = TM.getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -695,24 +726,24 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
}
- return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang,
- TM, MMI, Streamer);
+ return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+ MMI, Streamer);
}
MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
- const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+ const GlobalValue *GV, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
// The mach-o version of this method defaults to returning a stub reference.
MachineModuleInfoMachO &MachOMMI =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
- MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (!StubSym.getPointer()) {
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ MCSymbol *Sym = TM.getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -793,13 +824,16 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
}
void TargetLoweringObjectFileMachO::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const {
- SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
- const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
- bool CannotUsePrivateLabel =
- !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
- Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+ bool CannotUsePrivateLabel = true;
+ if (auto *GO = GV->getBaseObject()) {
+ SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
+ const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
+ CannotUsePrivateLabel =
+ !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+ }
+ getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
}
//===----------------------------------------------------------------------===//
@@ -886,22 +920,21 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
}
MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
int Selection = 0;
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
- StringRef Name = GV->getSection();
+ StringRef Name = GO->getSection();
StringRef COMDATSymName = "";
- if (GV->hasComdat()) {
- Selection = getSelectionForCOFF(GV);
+ if (GO->hasComdat()) {
+ Selection = getSelectionForCOFF(GO);
const GlobalValue *ComdatGV;
if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
- ComdatGV = getComdatGVForCOFF(GV);
+ ComdatGV = getComdatGVForCOFF(GO);
else
- ComdatGV = GV;
+ ComdatGV = GO;
if (!ComdatGV->hasPrivateLinkage()) {
- MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ MCSymbol *Sym = TM.getSymbol(ComdatGV);
COMDATSymName = Sym->getName();
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
} else {
@@ -926,8 +959,7 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
}
MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// If we have -ffunction-sections then we should emit the global value to a
// uniqued section specifically for it.
bool EmitUniquedSection;
@@ -936,32 +968,32 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
else
EmitUniquedSection = TM.getDataSections();
- if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
+ if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
- int Selection = getSelectionForCOFF(GV);
+ int Selection = getSelectionForCOFF(GO);
if (!Selection)
Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
const GlobalValue *ComdatGV;
- if (GV->hasComdat())
- ComdatGV = getComdatGVForCOFF(GV);
+ if (GO->hasComdat())
+ ComdatGV = getComdatGVForCOFF(GO);
else
- ComdatGV = GV;
+ ComdatGV = GO;
unsigned UniqueID = MCContext::GenericSectionID;
if (EmitUniquedSection)
UniqueID = NextUniqueID++;
if (!ComdatGV->hasPrivateLinkage()) {
- MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection, UniqueID);
} else {
SmallString<256> TmpData;
- Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
+ getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true);
return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
Selection, UniqueID);
}
@@ -986,7 +1018,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
}
void TargetLoweringObjectFileCOFF::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const {
bool CannotUsePrivateLabel = false;
if (GV->hasPrivateLinkage() &&
@@ -994,11 +1026,11 @@ void TargetLoweringObjectFileCOFF::getNameWithPrefix(
(isa<GlobalVariable>(GV) && TM.getDataSections())))
CannotUsePrivateLabel = true;
- Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+ getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
}
MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
- const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ const Function &F, const TargetMachine &TM) const {
// If the function can be removed, produce a unique section so that
// the table doesn't prevent the removal.
const Comdat *C = F.getComdat();
@@ -1010,7 +1042,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
if (F.hasPrivateLinkage())
return ReadOnlySection;
- MCSymbol *Sym = TM.getSymbol(&F, Mang);
+ MCSymbol *Sym = TM.getSymbol(&F);
StringRef COMDATSymName = Sym->getName();
SectionKind Kind = SectionKind::getReadOnly();
@@ -1023,10 +1055,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
-void TargetLoweringObjectFileCOFF::
-emitModuleFlags(MCStreamer &Streamer,
- ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
- Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileCOFF::emitModuleFlags(
+ MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ const TargetMachine &TM) const {
MDNode *LinkerOptions = nullptr;
for (const auto &MFE : ModuleFlags) {
@@ -1052,6 +1083,31 @@ emitModuleFlags(MCStreamer &Streamer,
}
}
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TM);
+ const Triple &T = TM.getTargetTriple();
+ if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ StaticCtorSection =
+ Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ StaticDtorSection =
+ Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ } else {
+ StaticCtorSection = Ctx.getCOFFSection(
+ ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
+ StaticDtorSection = Ctx.getCOFFSection(
+ ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getData());
+ }
+}
+
MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
return getContext().getAssociativeCOFFSection(
@@ -1065,7 +1121,7 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
}
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
- raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const {
+ raw_ostream &OS, const GlobalValue *GV) const {
if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
return;
@@ -1079,14 +1135,14 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
std::string Flag;
raw_string_ostream FlagOS(Flag);
- Mang.getNameWithPrefix(FlagOS, GV, false);
+ getMangler().getNameWithPrefix(FlagOS, GV, false);
FlagOS.flush();
if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
OS << Flag.substr(1);
else
OS << Flag;
} else {
- Mang.getNameWithPrefix(OS, GV, false);
+ getMangler().getNameWithPrefix(OS, GV, false);
}
if (!GV->getValueType()->isFunctionTy()) {
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 8d2048f..b6da8e0 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -29,7 +29,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
// Check to see if we should eliminate non-leaf frame pointers.
if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
- return MF.getFrameInfo()->hasCalls();
+ return MF.getFrameInfo().hasCalls();
return false;
}
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index b8c8209..e7ea2b4 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -38,8 +38,8 @@
using namespace llvm;
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
- cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc Scheduler"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
cl::desc("Disable branch folding"));
static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
@@ -98,6 +98,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
cl::value_desc("pass-name"), cl::init("option-unspecified"));
+static cl::opt<int> EnableGlobalISelAbort(
+ "global-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"global\" instruction selection "
+ "fails to lower/select an instruction: 0 disable the abort, "
+ "1 enable the abort, and "
+ "2 disable the abort but emit a diagnostic on failure"),
+ cl::init(1));
+
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
@@ -121,8 +129,7 @@ static cl::opt<CFLAAType> UseCFLAA(
clEnumValN(CFLAAType::Andersen, "anders",
"Enable inclusion-based CFL-AA"),
clEnumValN(CFLAAType::Both, "both",
- "Enable both variants of CFL-AA"),
- clEnumValEnd));
+ "Enable both variants of CFL-AA")));
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
@@ -150,7 +157,7 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
static IdentifyingPassPtr overridePass(AnalysisID StandardID,
IdentifyingPassPtr TargetID) {
if (StandardID == &PostRASchedulerID)
- return applyDisable(TargetID, DisablePostRA);
+ return applyDisable(TargetID, DisablePostRASched);
if (StandardID == &BranchFolderPassID)
return applyDisable(TargetID, DisableBranchFold);
@@ -252,8 +259,7 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
- StopAfter(nullptr), Started(true), Stopped(false),
+ : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
DisableVerify(false), EnableTailMerge(true) {
@@ -347,6 +353,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
if (StartBefore == PassID)
Started = true;
+ if (StopBefore == PassID)
+ Stopped = true;
if (Started && !Stopped) {
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
@@ -469,12 +477,17 @@ void TargetPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
addPass(createPartiallyInlineLibCallsPass());
+
+ // Insert calls to mcount-like functions.
+ addPass(createCountingFunctionInserterPass());
}
/// Turn exception handling constructs into something the code generators can
/// handle.
void TargetPassConfig::addPassesToHandleExceptions() {
- switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ const MCAsmInfo *MCAI = TM->getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ switch (MCAI->getExceptionHandlingType()) {
case ExceptionHandling::SjLj:
// SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
// Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
@@ -483,7 +496,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
addPass(createSjLjEHPreparePass());
- // FALLTHROUGH
+ LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
addPass(createDwarfEHPass(TM));
@@ -557,9 +570,6 @@ void TargetPassConfig::addISelPrepare() {
void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
- if (TM->Options.EnableIPRA)
- addPass(createRegUsageInfoPropPass());
-
// Insert a machine instr printer pass after the specified pass.
if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
!StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
@@ -575,6 +585,9 @@ void TargetPassConfig::addMachinePasses() {
// Print the instruction selected machine code...
printAndVerify("After Instruction Selection");
+ if (TM->Options.EnableIPRA)
+ addPass(createRegUsageInfoPropPass());
+
// Expand pseudo-instructions emitted by ISel.
addPass(&ExpandISelPseudosID);
@@ -886,3 +899,14 @@ void TargetPassConfig::addBlockPlacement() {
addPass(&MachineBlockPlacementStatsID);
}
}
+
+//===---------------------------------------------------------------------===//
+/// GlobalISel Configuration
+//===---------------------------------------------------------------------===//
+bool TargetPassConfig::isGlobalISelAbortEnabled() const {
+ return EnableGlobalISelAbort == 1;
+}
+
+bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
+ return EnableGlobalISelAbort == 2;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e1d90cb..cd50c5b 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
regclass_iterator RCB, regclass_iterator RCE,
const char *const *SRINames,
- const unsigned *SRILaneMasks,
- unsigned SRICoveringLanes)
+ const LaneBitmask *SRILaneMasks,
+ LaneBitmask SRICoveringLanes)
: InfoDesc(ID), SubRegIndexNames(SRINames),
SubRegIndexLaneMasks(SRILaneMasks),
RegClassBegin(RCB), RegClassEnd(RCE),
@@ -40,6 +40,36 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
TargetRegisterInfo::~TargetRegisterInfo() {}
+void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
+ const {
+ for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
+ RegisterSet.set(*AI);
+}
+
+bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
+ ArrayRef<MCPhysReg> Exceptions) const {
+ // Check that all super registers of reserved regs are reserved as well.
+ BitVector Checked(getNumRegs());
+ for (int Reg = RegisterSet.find_first(); Reg>=0;
+ Reg = RegisterSet.find_next(Reg)) {
+ if (Checked[Reg])
+ continue;
+ for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
+ if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
+ dbgs() << "Error: Super register " << PrintReg(*SR, this)
+ << " of reserved register " << PrintReg(Reg, this)
+ << " is not reserved.\n";
+ return false;
+ }
+
+ // We transitively check superregs. So we can remember this for later
+ // to avoid compiletime explosion in deep register hierarchies.
+ Checked.set(*SR);
+ }
+ }
+ return true;
+}
+
namespace llvm {
Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
@@ -97,12 +127,6 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
});
}
-Printable PrintLaneMask(LaneBitmask LaneMask) {
- return Printable([LaneMask](raw_ostream &OS) {
- OS << format("%08X", LaneMask);
- });
-}
-
} // End of llvm namespace
/// getAllocatableClass - Return the maximal subclass of the given register
@@ -354,7 +378,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// Check that Phys is in the allocation order. We shouldn't heed hints
// from VirtReg's register class if they aren't in the allocation order. The
// target probably has a reason for removing the register.
- if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+ if (!is_contained(Order, Phys))
return;
// All clear, tell the register allocator to prefer this register.
@@ -367,11 +391,11 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool TargetRegisterInfo::needsStackRealignment(
const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const Function *F = MF.getFunction();
unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
if (canRealignStack(MF))
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 022e912..83e52d3 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -144,7 +144,7 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
unsigned UseIdx = 0;
for (unsigned i = 0; i != UseOperIdx; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.readsReg())
+ if (MO.isReg() && MO.readsReg() && !MO.isDef())
++UseIdx;
}
return UseIdx;
diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
new file mode 100644
index 0000000..c74707d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -0,0 +1,54 @@
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtargetInfo Class
+//
+TargetSubtargetInfo::TargetSubtargetInfo(
+ const Triple &TT, StringRef CPU, StringRef FS,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
+ const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+ const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
+ const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+ : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+}
+
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
+
+bool TargetSubtargetInfo::enableAtomicExpand() const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enableMachineScheduler() const {
+ return false;
+}
+
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+ return enableMachineScheduler();
+}
+
+bool TargetSubtargetInfo::enableRALocalReassignment(
+ CodeGenOpt::Level OptLevel) const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enablePostRAScheduler() const {
+ return getSchedModel().PostRAScheduler;
+}
+
+bool TargetSubtargetInfo::useAA() const {
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8feb18b..0f1b2ed 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -109,7 +109,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
MachineInstr *MI, unsigned Dist);
- bool commuteInstruction(MachineInstr *MI,
+ bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
@@ -651,6 +651,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
/// Commute a two-address instruction and update the basic block, distance map,
/// and live variables if needed. Return true if it is successful.
bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+ unsigned DstIdx,
unsigned RegBIdx,
unsigned RegCIdx,
unsigned Dist) {
@@ -671,7 +672,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
// Update source register map.
unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
if (FromRegC) {
- unsigned RegA = MI->getOperand(0).getReg();
+ unsigned RegA = MI->getOperand(DstIdx).getReg();
SrcRegMap[RegA] = FromRegC;
}
@@ -1171,6 +1172,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
unsigned BaseOpIdx,
bool BaseOpKilled,
unsigned Dist) {
+ if (!MI->isCommutable())
+ return false;
+
unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
unsigned OpsNum = MI->getDesc().getNumOperands();
@@ -1180,7 +1184,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
// and OtherOpIdx are commutable, it does not really search for
// other commutable operands and does not change the values of passed
// variables.
- if (OtherOpIdx == BaseOpIdx ||
+ if (OtherOpIdx == BaseOpIdx || !MI->getOperand(OtherOpIdx).isReg() ||
!TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
continue;
@@ -1199,7 +1203,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
}
// If it's profitable to commute, try to do so.
- if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+ if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
+ Dist)) {
++NumCommuted;
if (AggressiveCommute)
++NumAggrCommuted;
@@ -1567,14 +1572,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (!IsEarlyClobber) {
// Replace other (un-tied) uses of regB with LastCopiedReg.
for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+ if (MO.isReg() && MO.getReg() == RegB &&
MO.isUse()) {
if (MO.isKill()) {
MO.setIsKill(false);
RemovedKillFlag = true;
}
MO.setReg(LastCopiedReg);
- MO.setSubReg(0);
+ MO.setSubReg(MO.getSubReg());
}
}
}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 501e01c..c2db56a 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -40,7 +40,7 @@
using namespace llvm;
static bool eliminateUnreachableBlock(Function &F) {
- SmallPtrSet<BasicBlock*, 8> Reachable;
+ df_iterator_default_set<BasicBlock*> Reachable;
// Mark all reachable blocks.
for (BasicBlock *BB : depth_first_ext(&F, Reachable))
@@ -130,7 +130,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
- SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ df_iterator_default_set<MachineBasicBlock*> Reachable;
bool ModifiedPHI = false;
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 8a3a032..0d506d6 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -73,8 +73,8 @@ void VirtRegMap::grow() {
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
++NumSpillSlots;
return SS;
}
@@ -110,7 +110,7 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
assert((SS >= 0 ||
- (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ (SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
"illegal fixed frame index");
Virt2StackSlotMap[virtReg] = SS;
}
@@ -177,7 +177,7 @@ public:
bool runOnMachineFunction(MachineFunction&) override;
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::AllVRegsAllocated);
+ MachineFunctionProperties::Property::NoVRegs);
}
};
} // end anonymous namespace
@@ -266,7 +266,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
SlotIndex MBBBegin = MBBI->first;
// Advance all subrange iterators so that their end position is just
// behind MBBBegin (or the iterator is at the end).
- LaneBitmask LaneMask = 0;
+ LaneBitmask LaneMask;
for (auto &RangeIterPair : SubRanges) {
const LiveInterval::SubRange *SR = RangeIterPair.first;
LiveInterval::const_iterator &SRI = RangeIterPair.second;
@@ -277,7 +277,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
if (SRI->start <= MBBBegin)
LaneMask |= SR->LaneMask;
}
- if (LaneMask == 0)
+ if (LaneMask.none())
continue;
MachineBasicBlock *MBB = MBBI->second;
MBB->addLiveIn(PhysReg, LaneMask);
@@ -338,10 +338,11 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
assert(LI.liveAt(BaseIndex) &&
"Reads of completely dead register should be marked undef already");
unsigned SubRegIdx = MO.getSubReg();
+ assert(SubRegIdx != 0 && LI.hasSubRanges());
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
// See if any of the relevant subregister liveranges is defined at this point.
for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
+ if ((SR.LaneMask & UseMask).any() && SR.liveAt(BaseIndex))
return false;
}
return true;
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 041fb7b..568720c 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -62,7 +62,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
- const char *getPassName() const override {
+ StringRef getPassName() const override {
return "Windows exception handling preparation";
}
@@ -521,7 +521,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
// Create the entry for this cleanup with the appropriate handler
- // properties. Finaly and fault handlers are distinguished by arity.
+ // properties. Finally and fault handlers are distinguished by arity.
ClrHandlerType HandlerType =
(Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
: ClrHandlerType::Finally);
@@ -708,7 +708,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
void WinEHPrepare::cloneCommonBlocks(Function &F) {
// We need to clone all blocks which belong to multiple funclets. Values are
- // remapped throughout the funclet to propogate both the new instructions
+ // remapped throughout the funclet to propagate both the new instructions
// *and* the new basic blocks themselves.
for (auto &Funclets : FuncletBlocks) {
BasicBlock *FuncletPadBB = Funclets.first;
@@ -1202,8 +1202,12 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Goto->setSuccessor(0, PHIBlock);
CatchRet->setSuccessor(NewBlock);
// Update the color mapping for the newly split edge.
+ // Grab a reference to the ColorVector to be inserted before getting the
+ // reference to the vector we are copying because inserting the new
+ // element in BlockColors might cause the map to be reallocated.
+ ColorVector &ColorsForNewBlock = BlockColors[NewBlock];
ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
- BlockColors[NewBlock] = ColorsForPHIBlock;
+ ColorsForNewBlock = ColorsForPHIBlock;
for (BasicBlock *FuncletPad : ColorsForPHIBlock)
FuncletBlocks[FuncletPad].push_back(NewBlock);
// Treat the new block as incoming for load insertion.
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 1f95708..63bd762 100644
--- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -34,7 +34,82 @@ struct XRayInstrumentation : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ // Replace the original RET instruction with the exit sled code ("patchable
+ // ret" pseudo-instruction), so that at runtime XRay can replace the sled
+ // with a code jumping to XRay trampoline, which calls the tracing handler
+ // and, in the end, issues the RET instruction.
+ // This is the approach to go on CPUs which have a single RET instruction,
+ // like x86/x86_64.
+ void replaceRetWithPatchableRet(MachineFunction &MF,
+ const TargetInstrInfo *TII);
+
+ // Prepend the original return instruction with the exit sled code ("patchable
+ // function exit" pseudo-instruction), preserving the original return
+ // instruction just after the exit sled code.
+ // This is the approach to go on CPUs which have multiple options for the
+ // return instruction, like ARM. For such CPUs we can't just jump into the
+ // XRay trampoline and issue a single return instruction there. We rather
+ // have to call the trampoline and return from it to the original return
+ // instruction of the function being instrumented.
+ void prependRetWithPatchableExit(MachineFunction &MF,
+ const TargetInstrInfo *TII);
};
+} // anonymous namespace
+
+void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
+ const TargetInstrInfo *TII)
+{
+ // We look for *all* terminators and returns, then replace those with
+ // PATCHABLE_RET instructions.
+ SmallVector<MachineInstr *, 4> Terminators;
+ for (auto &MBB : MF) {
+ for (auto &T : MBB.terminators()) {
+ unsigned Opc = 0;
+ if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+ // Replace return instructions with:
+ // PATCHABLE_RET <Opcode>, <Operand>...
+ Opc = TargetOpcode::PATCHABLE_RET;
+ }
+ if (TII->isTailCall(T)) {
+ // Treat the tail call as a return instruction, which has a
+ // different-looking sled than the normal return case.
+ Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+ }
+ if (Opc != 0) {
+ auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
+ .addImm(T.getOpcode());
+ for (auto &MO : T.operands())
+ MIB.addOperand(MO);
+ Terminators.push_back(&T);
+ }
+ }
+ }
+
+ for (auto &I : Terminators)
+ I->eraseFromParent();
+}
+
+void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
+ const TargetInstrInfo *TII)
+{
+ for (auto &MBB : MF) {
+ for (auto &T : MBB.terminators()) {
+ unsigned Opc = 0;
+ if (T.isReturn()) {
+ Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
+ }
+ if (TII->isTailCall(T)) {
+ Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+ }
+ if (Opc != 0) {
+ // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or
+ // PATCHABLE_TAIL_CALL .
+ BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc));
+ }
+ }
+ }
}
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
@@ -54,39 +129,43 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
return false; // Function is too small.
}
+ // We look for the first non-empty MachineBasicBlock, so that we can insert
+ // the function instrumentation in the appropriate place.
+ auto MBI =
+ find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+ if (MBI == MF.end())
+ return false; // The function is empty.
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ auto &FirstMBB = *MBI;
+ auto &FirstMI = *FirstMBB.begin();
+
+ if (!MF.getSubtarget().isXRaySupported()) {
+ FirstMI.emitError("An attempt to perform XRay instrumentation for an"
+ " unsupported target.");
+ return false;
+ }
+
// FIXME: Do the loop triviality analysis here or in an earlier pass.
// First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
// MachineFunction.
- auto &FirstMBB = *MF.begin();
- auto &FirstMI = *FirstMBB.begin();
- auto *TII = MF.getSubtarget().getInstrInfo();
BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
- // Then we look for *all* terminators and returns, then replace those with
- // PATCHABLE_RET instructions.
- SmallVector<MachineInstr *, 4> Terminators;
- for (auto &MBB : MF) {
- for (auto &T : MBB.terminators()) {
- // FIXME: Handle tail calls here too?
- if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
- // Replace return instructions with:
- // PATCHABLE_RET <Opcode>, <Operand>...
- auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
- TII->get(TargetOpcode::PATCHABLE_RET))
- .addImm(T.getOpcode());
- for (auto &MO : T.operands())
- MIB.addOperand(MO);
- Terminators.push_back(&T);
- break;
- }
- }
+ switch (MF.getTarget().getTargetTriple().getArch()) {
+ case Triple::ArchType::arm:
+ case Triple::ArchType::thumb:
+ case Triple::ArchType::aarch64:
+ // For the architectures which don't have a single return instruction
+ prependRetWithPatchableExit(MF, TII);
+ break;
+ default:
+ // For the architectures that have a single return instruction (such as
+ // RETQ on x86_64).
+ replaceRetWithPatchableRet(MF, TII);
+ break;
}
-
- for (auto &I : Terminators)
- I->eraseFromParent();
-
return true;
}
OpenPOWER on IntegriCloud